package me.devld.tour.util;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.HtmlStreamEventReceiver;
import org.owasp.html.HtmlStreamEventReceiverWrapper;
import org.owasp.html.PolicyFactory;
import org.springframework.util.StringUtils;
import java.util.*;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlUtils {
private static final Pattern COLOR_NAME = Pattern.compile(
+ "|red|silver|teal|white|yellow)");
// HTML/CSS Spec allows 3 or 6 digit hex to specify color
private static final Pattern COLOR_CODE = Pattern.compile(
private static final Pattern NUMBER_OR_PERCENT = Pattern.compile(
private static final Pattern PARAGRAPH = Pattern.compile(
private static final Pattern HTML_ID = Pattern.compile(
// force non-empty with a '+' at the end instead of '*'
private static final Pattern HTML_TITLE = Pattern.compile(
private static final Pattern HTML_CLASS = Pattern.compile(
private static final Pattern ONSITE_URL = Pattern.compile(
private static final Pattern OFFSITE_URL = Pattern.compile(
+ "[\\p{L}\\p{N}\\p{Zs}\\.\\#@\\$%\\+&;:\\-_~,\\?=/!\\(\\)]*+\\s*");
private static final Pattern NUMBER = Pattern.compile(
private static final Pattern NAME = Pattern.compile("[a-zA-Z0-9\\-_\\$]+");
private static final Pattern ALIGN = Pattern.compile(
private static final Pattern VALIGN = Pattern.compile(
private static final Predicate<String> COLOR_NAME_OR_COLOR_CODE
= matchesEither(COLOR_NAME, COLOR_CODE);
private static final Predicate<String> ONSITE_OR_OFFSITE_URL
= matchesEither(ONSITE_URL, OFFSITE_URL);
private static final Pattern HISTORY_BACK = Pattern.compile(
private static final Pattern ONE_CHAR = Pattern.compile(
".?", Pattern.DOTALL);
private static final PolicyFactory POLICY = getBaseBuilder().toFactory();
private static Predicate<String> matchesEither(
final Pattern a, final Pattern b) {
return s -> a.matcher(s).matches() || b.matcher(s).matches();
private static final Pattern HTML_IMG_PATTERN = Pattern.compile("<img[^<]*/?>", Pattern.CASE_INSENSITIVE);
private static final Pattern HTML_ATTR_PATTERN = Pattern.compile("([A-z0-9\\-_]+)=(?:[\"'](.*?)[\"'])?");
private static final Pattern MARKDOWN_IMG_PATTERN = Pattern.compile("(?:!\\[(.*?)]\\((.*?)\\))");
public static String sanitizer(String raw) {
return POLICY.sanitize(raw);
private static HtmlPolicyBuilder getBaseBuilder() {
return new HtmlPolicyBuilder()
.matching(Pattern.compile("[\\w;, \\-]+"))
.allowAttributes("border", "hspace", "vspace").matching(NUMBER)
.allowAttributes("border", "cellpadding", "cellspacing")
.onElements("td", "th", "tr")
.onElements("td", "th")
.onElements("td", "th")
.allowAttributes("axis", "headers").matching(NAME)
.onElements("td", "th")
.onElements("td", "th")
.onElements("td", "th")
.allowAttributes("height", "width").matching(NUMBER_OR_PERCENT)
.onElements("table", "td", "th", "tr", "img")
.onElements("thead", "tbody", "tfoot", "img",
"td", "th", "tr", "colgroup", "col")
.onElements("thead", "tbody", "tfoot",
"td", "th", "tr", "colgroup", "col")
.onElements("td", "th", "tr", "colgroup", "col",
"thead", "tbody", "tfoot")
.onElements("td", "th", "tr", "colgroup", "col",
"thead", "tbody", "tfoot")
.allowAttributes("colspan", "rowspan").matching(NUMBER)
.onElements("td", "th")
.allowAttributes("span", "width").matching(NUMBER_OR_PERCENT)
.onElements("colgroup", "col")
"a", "label", "noscript", "h1", "h2", "h3", "h4", "h5", "h6",
"p", "i", "b", "u", "strong", "em", "small", "big", "pre", "code",
"cite", "samp", "sub", "sup", "strike", "center", "blockquote",
"hr", "br", "col", "font", "map", "span", "div", "img",
"ul", "ol", "li", "dd", "dt", "dl", "tbody", "thead", "tfoot",
"table", "td", "th", "tr", "colgroup", "fieldset", "legend");
public static SanitizerBuilder buildSanitizer(HtmlTagProcessor processor) {
HtmlPolicyBuilder builder = getBaseBuilder().withPostprocessor(sink -> new HtmlStreamEventReceiverWrapper(sink) {
public void openTag(String elementName,