001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018    import com.liferay.portal.kernel.util.CharPool;
019    import com.liferay.portal.kernel.util.Html;
020    import com.liferay.portal.kernel.util.HttpUtil;
021    import com.liferay.portal.kernel.util.StringBundler;
022    import com.liferay.portal.kernel.util.StringPool;
023    import com.liferay.portal.kernel.util.StringUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.util.HashMap;
027    import java.util.Map;
028    import java.util.regex.Matcher;
029    import java.util.regex.Pattern;
030    
031    import net.htmlparser.jericho.Renderer;
032    import net.htmlparser.jericho.Source;
033    import net.htmlparser.jericho.TextExtractor;
034    
035    /**
036     * Provides the implementation of the HTML utility interface for escaping,
037     * rendering, replacing, and stripping HTML text. This class uses XSS
038     * recommendations from <a
039     * href="http://www.owasp.org/index.php/Cross_Site_Scripting#How_to_Protect_Yourself">http://www.owasp.org/index.php/Cross_Site_Scripting#How_to_Protect_Yourself</a>
040     * when escaping HTML text.
041     *
042     * @author Brian Wing Shun Chan
043     * @author Clarence Shen
044     * @author Harry Mark
045     * @author Samuel Kong
046     * @author Connor McKay
047     * @author Shuyang Zhou
048     */
049    @DoPrivileged
050    public class HtmlImpl implements Html {
051    
052            public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053    
054            public static final int ESCAPE_MODE_CSS = 2;
055    
056            public static final int ESCAPE_MODE_JS = 3;
057    
058            public static final int ESCAPE_MODE_TEXT = 4;
059    
060            public static final int ESCAPE_MODE_URL = 5;
061    
062            /**
063             * Generates a string with the data-* attributes generated from the keys and
064             * values of a map. For example, a map containing
065             * <code>{key1=value1;key2=value2}</code> is returned as the string
066             * <code>data-key1=value1 data-key2=value2</code>.
067             *
068             * @param  data the map of values to convert to data-* attributes
069             * @return a string with the data attributes, or <code>null</code> if the
070             *         map is <code>null</code>
071             */
072            @Override
073            public String buildData(Map<String, Object> data) {
074                    if ((data == null) || data.isEmpty()) {
075                            return StringPool.BLANK;
076                    }
077    
078                    StringBundler sb = new StringBundler(data.size() * 5);
079    
080                    for (Map.Entry<String, Object> entry : data.entrySet()) {
081                            sb.append("data-");
082                            sb.append(entry.getKey());
083                            sb.append("=\"");
084                            sb.append(escapeAttribute(String.valueOf(entry.getValue())));
085                            sb.append("\" ");
086                    }
087    
088                    return sb.toString();
089            }
090    
091            /**
092             * Escapes the text so that it is safe to use in an HTML context.
093             *
094             * @param  text the text to escape
095             * @return the escaped HTML text, or <code>null</code> if the text is
096             *         <code>null</code>
097             */
098            @Override
099            public String escape(String text) {
100                    if (text == null) {
101                            return null;
102                    }
103    
104                    if (text.length() == 0) {
105                            return StringPool.BLANK;
106                    }
107    
108                    // Escape using XSS recommendations from
109                    // http://www.owasp.org/index.php/Cross_Site_Scripting
110                    // #How_to_Protect_Yourself
111    
112                    StringBundler sb = null;
113    
114                    int lastReplacementIndex = 0;
115    
116                    for (int i = 0; i < text.length(); i++) {
117                            char c = text.charAt(i);
118    
119                            String replacement = null;
120    
121                            switch (c) {
122                                    case '<':
123                                            replacement = "&lt;";
124    
125                                            break;
126    
127                                    case '>':
128                                            replacement = "&gt;";
129    
130                                            break;
131    
132                                    case '&':
133                                            replacement = "&amp;";
134    
135                                            break;
136    
137                                    case '"':
138                                            replacement = "&#034;";
139    
140                                            break;
141    
142                                    case '\'':
143                                            replacement = "&#039;";
144    
145                                            break;
146    
147                                    case '\u00bb': // '�'
148                                            replacement = "&#187;";
149    
150                                            break;
151    
152                                    case '\u2013':
153                                            replacement = "&#x2013;";
154    
155                                            break;
156    
157                                    case '\u2014':
158                                            replacement = "&#x2014;";
159    
160                                            break;
161                            }
162    
163                            if (replacement != null) {
164                                    if (sb == null) {
165                                            sb = new StringBundler();
166                                    }
167    
168                                    if (i > lastReplacementIndex) {
169                                            sb.append(text.substring(lastReplacementIndex, i));
170                                    }
171    
172                                    sb.append(replacement);
173    
174                                    lastReplacementIndex = i + 1;
175                            }
176                    }
177    
178                    if (sb == null) {
179                            return text;
180                    }
181    
182                    if (lastReplacementIndex < text.length()) {
183                            sb.append(text.substring(lastReplacementIndex));
184                    }
185    
186                    return sb.toString();
187            }
188    
189            /**
190             * Escapes the input text as a hexadecimal value, based on the mode (type).
191             * The encoding types include: {@link #ESCAPE_MODE_ATTRIBUTE}, {@link
192             * #ESCAPE_MODE_CSS}, {@link #ESCAPE_MODE_JS}, {@link #ESCAPE_MODE_TEXT},
193             * and {@link #ESCAPE_MODE_URL}.
194             *
195             * <p>
196             * Note that <code>escape(text, ESCAPE_MODE_TEXT)</code> returns the same as
197             * <code>escape(text)</code>.
198             * </p>
199             *
200             * @param  text the text to escape
201             * @param  mode the encoding type
202             * @return the escaped hexadecimal value of the input text, based on the
203             *         mode, or <code>null</code> if the text is <code>null</code>
204             */
205            @Override
206            public String escape(String text, int mode) {
207                    if (text == null) {
208                            return null;
209                    }
210    
211                    if (text.length() == 0) {
212                            return StringPool.BLANK;
213                    }
214    
215                    String prefix = StringPool.BLANK;
216                    String postfix = StringPool.BLANK;
217    
218                    if (mode == ESCAPE_MODE_ATTRIBUTE) {
219                            prefix = "&#x";
220                            postfix = StringPool.SEMICOLON;
221                    }
222                    else if (mode == ESCAPE_MODE_CSS) {
223                            prefix = StringPool.BACK_SLASH;
224                    }
225                    else if (mode == ESCAPE_MODE_JS) {
226                            prefix = "\\x";
227                    }
228                    else if (mode == ESCAPE_MODE_URL) {
229                            return HttpUtil.encodeURL(text, true);
230                    }
231                    else {
232                            return escape(text);
233                    }
234    
235                    StringBuilder sb = new StringBuilder(text.length());
236    
237                    for (int i = 0; i < text.length(); i++) {
238                            char c = text.charAt(i);
239    
240                            if ((c > 255) || (c == CharPool.DASH) ||
241                                    (c == CharPool.UNDERLINE) || Character.isLetterOrDigit(c)) {
242    
243                                    sb.append(c);
244                            }
245                            else {
246                                    sb.append(prefix);
247    
248                                    String hexString = StringUtil.toHexString(c);
249    
250                                    if (hexString.length() == 1) {
251                                            sb.append(StringPool.ASCII_TABLE[48]);
252                                    }
253    
254                                    sb.append(hexString);
255                                    sb.append(postfix);
256                            }
257                    }
258    
259                    if (sb.length() == text.length()) {
260                            return text;
261                    }
262    
263                    return sb.toString();
264            }
265    
266            /**
267             * Escapes the attribute value so that it is safe to use as an attribute
268             * value.
269             *
270             * @param  attribute the attribute to escape
271             * @return the escaped attribute value, or <code>null</code> if the
272             *         attribute value is <code>null</code>
273             */
274            @Override
275            public String escapeAttribute(String attribute) {
276                    return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
277            }
278    
279            /**
280             * Escapes the CSS value so that it is safe to use in a CSS context.
281             *
282             * @param  css the CSS value to escape
283             * @return the escaped CSS value, or <code>null</code> if the CSS value is
284             *         <code>null</code>
285             */
286            @Override
287            public String escapeCSS(String css) {
288                    return escape(css, ESCAPE_MODE_CSS);
289            }
290    
291            /**
292             * Escapes the HREF attribute so that it is safe to use as an HREF
293             * attribute.
294             *
295             * @param  href the HREF attribute to escape
296             * @return the escaped HREF attribute, or <code>null</code> if the HREF
297             *         attribute is <code>null</code>
298             */
299            @Override
300            public String escapeHREF(String href) {
301                    if (href == null) {
302                            return null;
303                    }
304    
305                    if (href.length() == 0) {
306                            return StringPool.BLANK;
307                    }
308    
309                    int index = href.indexOf(StringPool.COLON);
310    
311                    if (index == 4) {
312                            String protocol = StringUtil.toLowerCase(href.substring(0, 4));
313    
314                            if (protocol.equals("data")) {
315                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
316                            }
317                    }
318                    else if (index == 10) {
319                            String protocol = StringUtil.toLowerCase(href.substring(0, 10));
320    
321                            if (protocol.equals("javascript")) {
322                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
323                            }
324                    }
325    
326                    return escapeAttribute(href);
327            }
328    
329            /**
330             * Escapes the JavaScript value so that it is safe to use in a JavaScript
331             * context.
332             *
333             * @param  js the JavaScript value to escape
334             * @return the escaped JavaScript value, or <code>null</code> if the
335             *         JavaScript value is <code>null</code>
336             */
337            @Override
338            public String escapeJS(String js) {
339                    return escape(js, ESCAPE_MODE_JS);
340            }
341    
342            @Override
343            public String escapeJSLink(String link) {
344                    if (Validator.isNull(link)) {
345                            return StringPool.BLANK;
346                    }
347    
348                    if (link.indexOf(StringPool.COLON) == 10) {
349                            String protocol = StringUtil.toLowerCase(link.substring(0, 10));
350    
351                            if (protocol.equals("javascript")) {
352                                    link = StringUtil.replaceFirst(link, StringPool.COLON, "%3a");
353                            }
354                    }
355    
356                    return link;
357            }
358    
359            /**
360             * Escapes the URL value so that it is safe to use as a URL.
361             *
362             * @param  url the URL value to escape
363             * @return the escaped URL value, or <code>null</code> if the URL value is
364             *         <code>null</code>
365             */
366            @Override
367            public String escapeURL(String url) {
368                    return escape(url, ESCAPE_MODE_URL);
369            }
370    
371            @Override
372            public String escapeXPath(String xPath) {
373                    if (Validator.isNull(xPath)) {
374                            return xPath;
375                    }
376    
377                    StringBuilder sb = new StringBuilder(xPath.length());
378    
379                    for (int i = 0; i < xPath.length(); i++) {
380                            char c = xPath.charAt(i);
381    
382                            boolean hasToken = false;
383    
384                            for (int j = 0; j < _XPATH_TOKENS.length; j++) {
385                                    if (c == _XPATH_TOKENS[j]) {
386                                            hasToken = true;
387    
388                                            break;
389                                    }
390                            }
391    
392                            if (hasToken) {
393                                    sb.append(StringPool.UNDERLINE);
394                            }
395                            else {
396                                    sb.append(c);
397                            }
398                    }
399    
400                    return sb.toString();
401            }
402    
403            @Override
404            public String escapeXPathAttribute(String xPathAttribute) {
405                    boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
406                    boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
407    
408                    if (hasQuote && hasApostrophe) {
409                            String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
410    
411                            return "concat('".concat(
412                                    StringUtil.merge(parts, "', \"'\", '")).concat("')");
413                    }
414    
415                    if (hasQuote) {
416                            return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
417                                    StringPool.APOSTROPHE);
418                    }
419    
420                    return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
421            }
422    
423            /**
424             * Extracts the raw text from the HTML input, compressing its whitespace and
425             * removing all attributes, scripts, and styles.
426             *
427             * <p>
428             * For example, raw text returned by this method can be stored in a search
429             * index.
430             * </p>
431             *
432             * @param  html the HTML text
433             * @return the raw text from the HTML input, or <code>null</code> if the
434             *         HTML input is <code>null</code>
435             */
436            @Override
437            public String extractText(String html) {
438                    if (html == null) {
439                            return null;
440                    }
441    
442                    Source source = new Source(html);
443    
444                    TextExtractor textExtractor = source.getTextExtractor();
445    
446                    return textExtractor.toString();
447            }
448    
449            @Override
450            public String fromInputSafe(String text) {
451                    return StringUtil.replace(text, "&amp;", "&");
452            }
453    
454            @Override
455            public String getAUICompatibleId(String text) {
456                    if (Validator.isNull(text)) {
457                            return text;
458                    }
459    
460                    StringBundler sb = null;
461    
462                    int lastReplacementIndex = 0;
463    
464                    for (int i = 0; i < text.length(); i++) {
465                            char c = text.charAt(i);
466    
467                            if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
468                                    ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
469                                     (c != CharPool.NARROW_NO_BREAK_SPACE) &&
470                                     (c != CharPool.NO_BREAK_SPACE))) {
471    
472                                    continue;
473                            }
474    
475                            if (sb == null) {
476                                    sb = new StringBundler();
477                            }
478    
479                            if (i > lastReplacementIndex) {
480                                    sb.append(text.substring(lastReplacementIndex, i));
481                            }
482    
483                            sb.append(CharPool.UNDERLINE);
484    
485                            if (c != CharPool.UNDERLINE) {
486                                    sb.append(StringUtil.toHexString(c));
487                            }
488    
489                            sb.append(CharPool.UNDERLINE);
490    
491                            lastReplacementIndex = i + 1;
492                    }
493    
494                    if (sb == null) {
495                            return text;
496                    }
497    
498                    if (lastReplacementIndex < text.length()) {
499                            sb.append(text.substring(lastReplacementIndex));
500                    }
501    
502                    return sb.toString();
503            }
504    
505            /**
506             * Renders the HTML content into text. This provides a human readable
507             * version of the content that is modeled on the way Mozilla
508             * Thunderbird&reg; and other email clients provide an automatic conversion
509             * of HTML content to text in their alternative MIME encoding of emails.
510             *
511             * <p>
512             * Using the default settings, the output complies with the
513             * <code>Text/Plain; Format=Flowed (DelSp=No)</code> protocol described in
514             * <a href="http://tools.ietf.org/html/rfc3676">RFC-3676</a>.
515             * </p>
516             *
517             * @param  html the HTML text
518             * @return the rendered HTML text, or <code>null</code> if the HTML text is
519             *         <code>null</code>
520             */
521            @Override
522            public String render(String html) {
523                    if (html == null) {
524                            return null;
525                    }
526    
527                    Source source = new Source(html);
528    
529                    Renderer renderer = source.getRenderer();
530    
531                    return renderer.toString();
532            }
533    
534            /**
535             * Replaces all Microsoft&reg; Word Unicode characters with plain HTML
536             * entities or characters.
537             *
538             * @param      text the text
539             * @return     the converted text, or <code>null</code> if the text is
540             *             <code>null</code>
541             * @deprecated As of 7.0.0, with no direct replacement
542             */
543            @Deprecated
544            @Override
545            public String replaceMsWordCharacters(String text) {
546                    return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
547            }
548    
549            /**
550             * Replaces all new lines or carriage returns with the <code><br /></code>
551             * HTML tag.
552             *
553             * @param  html the text
554             * @return the converted text, or <code>null</code> if the text is
555             *         <code>null</code>
556             */
557            @Override
558            public String replaceNewLine(String html) {
559                    if (html == null) {
560                            return null;
561                    }
562    
563                    html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
564    
565                    return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
566            }
567    
568            /**
569             * Strips all content delimited by the tag out of the text.
570             *
571             * <p>
572             * If the tag appears multiple times, all occurrences (including the tag)
573             * are stripped. The tag may have attributes. In order for this method to
574             * recognize the tag, it must consist of a separate opening and closing tag.
575             * Self-closing tags remain in the result.
576             * </p>
577             *
578             * @param  text the text
579             * @param  tag the tag used for delimiting, which should only be the tag's
580             *         name (e.g. no &lt;)
581             * @return the text, without the stripped tag and its contents, or
582             *         <code>null</code> if the text is <code>null</code>
583             */
584            @Override
585            public String stripBetween(String text, String tag) {
586                    return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
587            }
588    
589            /**
590             * Strips all XML comments out of the text.
591             *
592             * @param  text the text
593             * @return the text, without the stripped XML comments, or <code>null</code>
594             *         if the text is <code>null</code>
595             */
596            @Override
597            public String stripComments(String text) {
598                    return StringUtil.stripBetween(text, "<!--", "-->");
599            }
600    
601            @Override
602            public String stripHtml(String text) {
603                    if (text == null) {
604                            return null;
605                    }
606    
607                    text = stripComments(text);
608    
609                    StringBuilder sb = new StringBuilder(text.length());
610    
611                    int x = 0;
612                    int y = text.indexOf("<");
613    
614                    while (y != -1) {
615                            sb.append(text.substring(x, y));
616                            sb.append(StringPool.SPACE);
617    
618                            // Look for text enclosed by <abc></abc>
619    
620                            if (isTag(_TAG_SCRIPT, text, y + 1)) {
621                                    y = stripTag(_TAG_SCRIPT, text, y);
622                            }
623                            else if (isTag(_TAG_STYLE, text, y + 1)) {
624                                    y = stripTag(_TAG_STYLE, text, y);
625                            }
626    
627                            x = text.indexOf(">", y);
628    
629                            if (x == -1) {
630                                    break;
631                            }
632    
633                            x++;
634    
635                            if (x < y) {
636    
637                                    // <b>Hello</b
638    
639                                    break;
640                            }
641    
642                            y = text.indexOf("<", x);
643                    }
644    
645                    if (y == -1) {
646                            sb.append(text.substring(x));
647                    }
648    
649                    return sb.toString();
650            }
651    
652            /**
653             * Encodes the text so that it's safe to use as an HTML input field value.
654             *
655             * <p>
656             * For example, the <code>&</code> character is replaced by
657             * <code>&amp;amp;</code>.
658             * </p>
659             *
660             * @param  text the text
661             * @return the encoded text that is safe to use as an HTML input field
662             *         value, or <code>null</code> if the text is <code>null</code>
663             */
664            @Override
665            public String toInputSafe(String text) {
666                    return StringUtil.replace(
667                            text,
668                            new String[] {"&", "\""},
669                            new String[] {"&amp;", "&quot;"});
670            }
671    
672            @Override
673            public String unescape(String text) {
674                    return StringUtil.replace(text, "&", ";", _unescapeMap);
675            }
676    
677            @Override
678            public String unescapeCDATA(String text) {
679                    if (text == null) {
680                            return null;
681                    }
682    
683                    if (text.length() == 0) {
684                            return StringPool.BLANK;
685                    }
686    
687                    text = StringUtil.replace(text, "&lt;![CDATA[", "<![CDATA[");
688                    text = StringUtil.replace(text, "]]&gt;", "]]>");
689    
690                    return text;
691            }
692    
693            @Override
694            public String wordBreak(String text, int columns) {
695                    StringBundler sb = new StringBundler();
696    
697                    int length = 0;
698                    int lastWrite = 0;
699                    int pos = 0;
700    
701                    Matcher matcher = _pattern.matcher(text);
702    
703                    while (matcher.find()) {
704                            if (matcher.start() < pos) {
705                                    continue;
706                            }
707    
708                            while ((length + matcher.start() - pos) >= columns) {
709                                    pos += columns - length;
710    
711                                    sb.append(text.substring(lastWrite, pos));
712                                    sb.append("<wbr/>&shy;");
713    
714                                    length = 0;
715                                    lastWrite = pos;
716                            }
717    
718                            length += matcher.start() - pos;
719    
720                            String group = matcher.group();
721    
722                            if (group.equals(StringPool.AMPERSAND)) {
723                                    int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
724    
725                                    if (x != -1) {
726                                            length++;
727                                            pos = x + 1;
728                                    }
729    
730                                    continue;
731                            }
732    
733                            if (group.equals(StringPool.LESS_THAN)) {
734                                    int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
735    
736                                    if (x != -1) {
737                                            pos = x + 1;
738                                    }
739    
740                                    continue;
741                            }
742    
743                            if (group.equals(StringPool.SPACE) ||
744                                    group.equals(StringPool.NEW_LINE)) {
745    
746                                    length = 0;
747                                    pos = matcher.start() + 1;
748                            }
749                    }
750    
751                    sb.append(text.substring(lastWrite));
752    
753                    return sb.toString();
754            }
755    
756            protected boolean isTag(char[] tag, String text, int pos) {
757                    if ((pos + tag.length + 1) <= text.length()) {
758                            char item = '\0';
759    
760                            for (int i = 0; i < tag.length; i++) {
761                                    item = text.charAt(pos++);
762    
763                                    if (Character.toLowerCase(item) != tag[i]) {
764                                            return false;
765                                    }
766                            }
767    
768                            item = text.charAt(pos);
769    
770                            // Check that char after tag is not a letter (i.e. another tag)
771    
772                            return !Character.isLetter(item);
773                    }
774                    else {
775                            return false;
776                    }
777            }
778    
779            protected int stripTag(char[] tag, String text, int pos) {
780                    int x = pos + _TAG_SCRIPT.length;
781    
782                    // Find end of the tag
783    
784                    x = text.indexOf(">", x);
785    
786                    if (x < 0) {
787                            return pos;
788                    }
789    
790                    // Check if preceding character is / (i.e. is this instance of <abc/>)
791    
792                    if (text.charAt(x-1) == '/') {
793                            return pos;
794                    }
795    
796                    // Search for the ending </abc> tag
797    
798                    while (true) {
799                            x = text.indexOf("</", x);
800    
801                            if (x >= 0) {
802                                    if (isTag(tag, text, x + 2)) {
803                                            pos = x;
804    
805                                            break;
806                                    }
807                                    else {
808    
809                                            // Skip past "</"
810    
811                                            x += 2;
812                                    }
813                            }
814                            else {
815                                    break;
816                            }
817                    }
818    
819                    return pos;
820            }
821    
822            private static final String[] _MS_WORD_HTML = new String[] {
823                    "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
824            };
825    
826            private static final String[] _MS_WORD_UNICODE = new String[] {
827                    "\u00ae", "\u2019", "\u201c", "\u201d"
828            };
829    
830            private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
831    
832            private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
833    
834            // See http://www.w3.org/TR/xpath20/#lexical-structure
835    
836            private static final char[] _XPATH_TOKENS = {
837                    '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
838                    '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232
839            };
840    
841            private static final Map<String, String> _unescapeMap = new HashMap<>();
842    
843            static {
844                    _unescapeMap.put("lt", "<");
845                    _unescapeMap.put("gt", ">");
846                    _unescapeMap.put("amp", "&");
847                    _unescapeMap.put("rsquo", "\u2019");
848                    _unescapeMap.put("#034", "\"");
849                    _unescapeMap.put("#039", "'");
850                    _unescapeMap.put("#040", "(");
851                    _unescapeMap.put("#041", ")");
852                    _unescapeMap.put("#044", ",");
853                    _unescapeMap.put("#035", "#");
854                    _unescapeMap.put("#037", "%");
855                    _unescapeMap.put("#059", ";");
856                    _unescapeMap.put("#061", "=");
857                    _unescapeMap.put("#043", "+");
858                    _unescapeMap.put("#045", "-");
859            }
860    
861            private final Pattern _pattern = Pattern.compile("([\\s<&]|$)");
862    
863    }