001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018    import com.liferay.portal.kernel.util.CharPool;
019    import com.liferay.portal.kernel.util.Html;
020    import com.liferay.portal.kernel.util.HttpUtil;
021    import com.liferay.portal.kernel.util.StringBundler;
022    import com.liferay.portal.kernel.util.StringPool;
023    import com.liferay.portal.kernel.util.StringUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.util.HashMap;
027    import java.util.Map;
028    import java.util.regex.Matcher;
029    import java.util.regex.Pattern;
030    
031    import net.htmlparser.jericho.Renderer;
032    import net.htmlparser.jericho.Source;
033    import net.htmlparser.jericho.TextExtractor;
034    
035    /**
036     * Provides the implementation of the HTML utility interface for escaping,
037     * rendering, replacing, and stripping HTML text. This class uses XSS
038     * recommendations from <a
039     * href="http://www.owasp.org/index.php/Cross_Site_Scripting#How_to_Protect_Yourself">http://www.owasp.org/index.php/Cross_Site_Scripting#How_to_Protect_Yourself</a>
040     * when escaping HTML text.
041     *
042     * @author Brian Wing Shun Chan
043     * @author Clarence Shen
044     * @author Harry Mark
045     * @author Samuel Kong
046     * @author Connor McKay
047     * @author Shuyang Zhou
048     */
049    @DoPrivileged
050    public class HtmlImpl implements Html {
051    
052            public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053    
054            public static final int ESCAPE_MODE_CSS = 2;
055    
056            public static final int ESCAPE_MODE_JS = 3;
057    
058            public static final int ESCAPE_MODE_TEXT = 4;
059    
060            public static final int ESCAPE_MODE_URL = 5;
061    
062            /**
063             * Generates a string with the data-* attributes generated from the keys and
064             * values of a map. For example, a map containing
065             * <code>{key1=value1;key2=value2}</code> is returned as the string
066             * <code>data-key1=value1 data-key2=value2</code>.
067             *
068             * @param  data the map of values to convert to data-* attributes
069             * @return a string with the data attributes, or <code>null</code> if the
070             *         map is <code>null</code>
071             */
072            @Override
073            public String buildData(Map<String, Object> data) {
074                    if ((data == null) || data.isEmpty()) {
075                            return StringPool.BLANK;
076                    }
077    
078                    StringBundler sb = new StringBundler(data.size() * 5);
079    
080                    for (Map.Entry<String, Object> entry : data.entrySet()) {
081                            sb.append("data-");
082                            sb.append(entry.getKey());
083                            sb.append("=\"");
084                            sb.append(escapeAttribute(String.valueOf(entry.getValue())));
085                            sb.append("\" ");
086                    }
087    
088                    return sb.toString();
089            }
090    
091            /**
092             * Escapes the text so that it is safe to use in an HTML context.
093             *
094             * @param  text the text to escape
095             * @return the escaped HTML text, or <code>null</code> if the text is
096             *         <code>null</code>
097             */
098            @Override
099            public String escape(String text) {
100                    if (text == null) {
101                            return null;
102                    }
103    
104                    if (text.length() == 0) {
105                            return StringPool.BLANK;
106                    }
107    
108                    // Escape using XSS recommendations from
109                    // http://www.owasp.org/index.php/Cross_Site_Scripting
110                    // #How_to_Protect_Yourself
111    
112                    StringBundler sb = null;
113    
114                    int lastReplacementIndex = 0;
115    
116                    for (int i = 0; i < text.length(); i++) {
117                            char c = text.charAt(i);
118    
119                            String replacement = null;
120    
121                            switch (c) {
122                                    case '<':
123                                            replacement = "&lt;";
124    
125                                            break;
126    
127                                    case '>':
128                                            replacement = "&gt;";
129    
130                                            break;
131    
132                                    case '&':
133                                            replacement = "&amp;";
134    
135                                            break;
136    
137                                    case '"':
138                                            replacement = "&#034;";
139    
140                                            break;
141    
142                                    case '\'':
143                                            replacement = "&#039;";
144    
145                                            break;
146    
147                                    case '\u00bb': // '???'
148                                            replacement = "&#187;";
149    
150                                            break;
151    
152                                    case '\u2013':
153                                            replacement = "&#x2013;";
154    
155                                            break;
156    
157                                    case '\u2014':
158                                            replacement = "&#x2014;";
159    
160                                            break;
161                            }
162    
163                            if (replacement != null) {
164                                    if (sb == null) {
165                                            sb = new StringBundler();
166                                    }
167    
168                                    if (i > lastReplacementIndex) {
169                                            sb.append(text.substring(lastReplacementIndex, i));
170                                    }
171    
172                                    sb.append(replacement);
173    
174                                    lastReplacementIndex = i + 1;
175                            }
176                    }
177    
178                    if (sb == null) {
179                            return text;
180                    }
181    
182                    if (lastReplacementIndex < text.length()) {
183                            sb.append(text.substring(lastReplacementIndex));
184                    }
185    
186                    return sb.toString();
187            }
188    
189            /**
190             * Escapes the input text as a hexadecimal value, based on the mode (type).
191             * The encoding types include: {@link #ESCAPE_MODE_ATTRIBUTE}, {@link
192             * #ESCAPE_MODE_CSS}, {@link #ESCAPE_MODE_JS}, {@link #ESCAPE_MODE_TEXT},
193             * and {@link #ESCAPE_MODE_URL}.
194             *
195             * <p>
196             * Note that <code>escape(text, ESCAPE_MODE_TEXT)</code> returns the same as
197             * <code>escape(text)</code>.
198             * </p>
199             *
200             * @param  text the text to escape
201             * @param  mode the encoding type
202             * @return the escaped hexadecimal value of the input text, based on the
203             *         mode, or <code>null</code> if the text is <code>null</code>
204             */
205            @Override
206            public String escape(String text, int mode) {
207                    if (text == null) {
208                            return null;
209                    }
210    
211                    if (text.length() == 0) {
212                            return StringPool.BLANK;
213                    }
214    
215                    String prefix = StringPool.BLANK;
216                    String postfix = StringPool.BLANK;
217    
218                    if (mode == ESCAPE_MODE_ATTRIBUTE) {
219                            prefix = "&#x";
220                            postfix = StringPool.SEMICOLON;
221                    }
222                    else if (mode == ESCAPE_MODE_CSS) {
223                            prefix = StringPool.BACK_SLASH;
224                    }
225                    else if (mode == ESCAPE_MODE_JS) {
226                            prefix = "\\x";
227                    }
228                    else if (mode == ESCAPE_MODE_URL) {
229                            return HttpUtil.encodeURL(text, true);
230                    }
231                    else {
232                            return escape(text);
233                    }
234    
235                    StringBuilder sb = new StringBuilder();
236    
237                    for (int i = 0; i < text.length(); i++) {
238                            char c = text.charAt(i);
239    
240                            if ((c > 255) || Character.isLetterOrDigit(c) ||
241                                    (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
242    
243                                    sb.append(c);
244                            }
245                            else {
246                                    sb.append(prefix);
247    
248                                    String hexString = StringUtil.toHexString(c);
249    
250                                    if (hexString.length() == 1) {
251                                            sb.append(StringPool.ASCII_TABLE[48]);
252                                    }
253    
254                                    sb.append(hexString);
255                                    sb.append(postfix);
256                            }
257                    }
258    
259                    if (sb.length() == text.length()) {
260                            return text;
261                    }
262                    else {
263                            return sb.toString();
264                    }
265            }
266    
267            /**
268             * Escapes the attribute value so that it is safe to use as an attribute
269             * value.
270             *
271             * @param  attribute the attribute to escape
272             * @return the escaped attribute value, or <code>null</code> if the
273             *         attribute value is <code>null</code>
274             */
275            @Override
276            public String escapeAttribute(String attribute) {
277                    return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
278            }
279    
280            /**
281             * Escapes the CSS value so that it is safe to use in a CSS context.
282             *
283             * @param  css the CSS value to escape
284             * @return the escaped CSS value, or <code>null</code> if the CSS value is
285             *         <code>null</code>
286             */
287            @Override
288            public String escapeCSS(String css) {
289                    return escape(css, ESCAPE_MODE_CSS);
290            }
291    
292            /**
293             * Escapes the HREF attribute so that it is safe to use as an HREF
294             * attribute.
295             *
296             * @param  href the HREF attribute to escape
297             * @return the escaped HREF attribute, or <code>null</code> if the HREF
298             *         attribute is <code>null</code>
299             */
300            @Override
301            public String escapeHREF(String href) {
302                    if (href == null) {
303                            return null;
304                    }
305    
306                    if (href.length() == 0) {
307                            return StringPool.BLANK;
308                    }
309    
310                    int index = href.indexOf(StringPool.COLON);
311    
312                    if (index == 4) {
313                            String protocol = StringUtil.toLowerCase(href.substring(0, 4));
314    
315                            if (protocol.equals("data")) {
316                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
317                            }
318                    }
319                    else if (index == 10) {
320                            String protocol = StringUtil.toLowerCase(href.substring(0, 10));
321    
322                            if (protocol.equals("javascript")) {
323                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
324                            }
325                    }
326    
327                    return escapeAttribute(href);
328            }
329    
330            /**
331             * Escapes the JavaScript value so that it is safe to use in a JavaScript
332             * context.
333             *
334             * @param  js the JavaScript value to escape
335             * @return the escaped JavaScript value, or <code>null</code> if the
336             *         JavaScript value is <code>null</code>
337             */
338            @Override
339            public String escapeJS(String js) {
340                    return escape(js, ESCAPE_MODE_JS);
341            }
342    
343            @Override
344            public String escapeJSLink(String link) {
345                    if (Validator.isNull(link)) {
346                            return StringPool.BLANK;
347                    }
348    
349                    if (link.indexOf(StringPool.COLON) == 10) {
350                            String protocol = StringUtil.toLowerCase(link.substring(0, 10));
351    
352                            if (protocol.equals("javascript")) {
353                                    link = StringUtil.replaceFirst(link, StringPool.COLON, "%3a");
354                            }
355                    }
356    
357                    return link;
358            }
359    
360            /**
361             * Escapes the URL value so that it is safe to use as a URL.
362             *
363             * @param  url the URL value to escape
364             * @return the escaped URL value, or <code>null</code> if the URL value is
365             *         <code>null</code>
366             */
367            @Override
368            public String escapeURL(String url) {
369                    return escape(url, ESCAPE_MODE_URL);
370            }
371    
372            @Override
373            public String escapeXPath(String xPath) {
374                    if (Validator.isNull(xPath)) {
375                            return xPath;
376                    }
377    
378                    StringBuilder sb = new StringBuilder(xPath.length());
379    
380                    for (int i = 0; i < xPath.length(); i++) {
381                            char c = xPath.charAt(i);
382    
383                            boolean hasToken = false;
384    
385                            for (int j = 0; j < _XPATH_TOKENS.length; j++) {
386                                    if (c == _XPATH_TOKENS[j]) {
387                                            hasToken = true;
388    
389                                            break;
390                                    }
391                            }
392    
393                            if (hasToken) {
394                                    sb.append(StringPool.UNDERLINE);
395                            }
396                            else {
397                                    sb.append(c);
398                            }
399                    }
400    
401                    return sb.toString();
402            }
403    
404            @Override
405            public String escapeXPathAttribute(String xPathAttribute) {
406                    boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
407                    boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
408    
409                    if (hasQuote && hasApostrophe) {
410                            String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
411    
412                            return "concat('".concat(
413                                    StringUtil.merge(parts, "', \"'\", '")).concat("')");
414                    }
415    
416                    if (hasQuote) {
417                            return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
418                                    StringPool.APOSTROPHE);
419                    }
420    
421                    return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
422            }
423    
424            /**
425             * Extracts the raw text from the HTML input, compressing its whitespace and
426             * removing all attributes, scripts, and styles.
427             *
428             * <p>
429             * For example, raw text returned by this method can be stored in a search
430             * index.
431             * </p>
432             *
433             * @param  html the HTML text
434             * @return the raw text from the HTML input, or <code>null</code> if the
435             *         HTML input is <code>null</code>
436             */
437            @Override
438            public String extractText(String html) {
439                    if (html == null) {
440                            return null;
441                    }
442    
443                    Source source = new Source(html);
444    
445                    TextExtractor textExtractor = source.getTextExtractor();
446    
447                    return textExtractor.toString();
448            }
449    
450            @Override
451            public String fromInputSafe(String text) {
452                    return StringUtil.replace(text, "&amp;", "&");
453            }
454    
455            @Override
456            public String getAUICompatibleId(String text) {
457                    if (Validator.isNull(text)) {
458                            return text;
459                    }
460    
461                    StringBundler sb = null;
462    
463                    int lastReplacementIndex = 0;
464    
465                    for (int i = 0; i < text.length(); i++) {
466                            char c = text.charAt(i);
467    
468                            if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
469                                    ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
470                                     (c != CharPool.NARROW_NO_BREAK_SPACE) &&
471                                     (c != CharPool.NO_BREAK_SPACE))) {
472    
473                                    continue;
474                            }
475    
476                            if (sb == null) {
477                                    sb = new StringBundler();
478                            }
479    
480                            if (i > lastReplacementIndex) {
481                                    sb.append(text.substring(lastReplacementIndex, i));
482                            }
483    
484                            sb.append(CharPool.UNDERLINE);
485    
486                            if (c != CharPool.UNDERLINE) {
487                                    sb.append(StringUtil.toHexString(c));
488                            }
489    
490                            sb.append(CharPool.UNDERLINE);
491    
492                            lastReplacementIndex = i + 1;
493                    }
494    
495                    if (sb == null) {
496                            return text;
497                    }
498    
499                    if (lastReplacementIndex < text.length()) {
500                            sb.append(text.substring(lastReplacementIndex));
501                    }
502    
503                    return sb.toString();
504            }
505    
506            /**
507             * Renders the HTML content into text. This provides a human readable
508             * version of the content that is modeled on the way Mozilla
509             * Thunderbird&reg; and other email clients provide an automatic conversion
510             * of HTML content to text in their alternative MIME encoding of emails.
511             *
512             * <p>
513             * Using the default settings, the output complies with the
514             * <code>Text/Plain; Format=Flowed (DelSp=No)</code> protocol described in
515             * <a href="http://tools.ietf.org/html/rfc3676">RFC-3676</a>.
516             * </p>
517             *
518             * @param  html the HTML text
519             * @return the rendered HTML text, or <code>null</code> if the HTML text is
520             *         <code>null</code>
521             */
522            @Override
523            public String render(String html) {
524                    if (html == null) {
525                            return null;
526                    }
527    
528                    Source source = new Source(html);
529    
530                    Renderer renderer = source.getRenderer();
531    
532                    return renderer.toString();
533            }
534    
535            /**
536             * Replaces all Microsoft&reg; Word Unicode characters with plain HTML
537             * entities or characters.
538             *
539             * @param      text the text
540             * @return     the converted text, or <code>null</code> if the text is
541             *             <code>null</code>
542             * @deprecated As of 7.0.0, with no direct replacement
543             */
544            @Deprecated
545            @Override
546            public String replaceMsWordCharacters(String text) {
547                    return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
548            }
549    
550            /**
551             * Replaces all new lines or carriage returns with the <code><br /></code>
552             * HTML tag.
553             *
554             * @param  html the text
555             * @return the converted text, or <code>null</code> if the text is
556             *         <code>null</code>
557             */
558            @Override
559            public String replaceNewLine(String html) {
560                    if (html == null) {
561                            return null;
562                    }
563    
564                    html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
565    
566                    return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
567            }
568    
569            /**
570             * Strips all content delimited by the tag out of the text.
571             *
572             * <p>
573             * If the tag appears multiple times, all occurrences (including the tag)
574             * are stripped. The tag may have attributes. In order for this method to
575             * recognize the tag, it must consist of a separate opening and closing tag.
576             * Self-closing tags remain in the result.
577             * </p>
578             *
579             * @param  text the text
580             * @param  tag the tag used for delimiting, which should only be the tag's
581             *         name (e.g. no &lt;)
582             * @return the text, without the stripped tag and its contents, or
583             *         <code>null</code> if the text is <code>null</code>
584             */
585            @Override
586            public String stripBetween(String text, String tag) {
587                    return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
588            }
589    
590            /**
591             * Strips all XML comments out of the text.
592             *
593             * @param  text the text
594             * @return the text, without the stripped XML comments, or <code>null</code>
595             *         if the text is <code>null</code>
596             */
597            @Override
598            public String stripComments(String text) {
599                    return StringUtil.stripBetween(text, "<!--", "-->");
600            }
601    
602            @Override
603            public String stripHtml(String text) {
604                    if (text == null) {
605                            return null;
606                    }
607    
608                    text = stripComments(text);
609    
610                    StringBuilder sb = new StringBuilder(text.length());
611    
612                    int x = 0;
613                    int y = text.indexOf("<");
614    
615                    while (y != -1) {
616                            sb.append(text.substring(x, y));
617                            sb.append(StringPool.SPACE);
618    
619                            // Look for text enclosed by <abc></abc>
620    
621                            if (isTag(_TAG_SCRIPT, text, y + 1)) {
622                                    y = stripTag(_TAG_SCRIPT, text, y);
623                            }
624                            else if (isTag(_TAG_STYLE, text, y + 1)) {
625                                    y = stripTag(_TAG_STYLE, text, y);
626                            }
627    
628                            x = text.indexOf(">", y);
629    
630                            if (x == -1) {
631                                    break;
632                            }
633    
634                            x++;
635    
636                            if (x < y) {
637    
638                                    // <b>Hello</b
639    
640                                    break;
641                            }
642    
643                            y = text.indexOf("<", x);
644                    }
645    
646                    if (y == -1) {
647                            sb.append(text.substring(x));
648                    }
649    
650                    return sb.toString();
651            }
652    
653            /**
654             * Encodes the text so that it's safe to use as an HTML input field value.
655             *
656             * <p>
657             * For example, the <code>&</code> character is replaced by
658             * <code>&amp;amp;</code>.
659             * </p>
660             *
661             * @param  text the text
662             * @return the encoded text that is safe to use as an HTML input field
663             *         value, or <code>null</code> if the text is <code>null</code>
664             */
665            @Override
666            public String toInputSafe(String text) {
667                    return StringUtil.replace(
668                            text,
669                            new String[] {"&", "\""},
670                            new String[] {"&amp;", "&quot;"});
671            }
672    
673            @Override
674            public String unescape(String text) {
675                    return StringUtil.replace(text, "&", ";", _unescapeMap);
676            }
677    
678            @Override
679            public String unescapeCDATA(String text) {
680                    if (text == null) {
681                            return null;
682                    }
683    
684                    if (text.length() == 0) {
685                            return StringPool.BLANK;
686                    }
687    
688                    text = StringUtil.replace(text, "&lt;![CDATA[", "<![CDATA[");
689                    text = StringUtil.replace(text, "]]&gt;", "]]>");
690    
691                    return text;
692            }
693    
694            @Override
695            public String wordBreak(String text, int columns) {
696                    StringBundler sb = new StringBundler();
697    
698                    int length = 0;
699                    int lastWrite = 0;
700                    int pos = 0;
701    
702                    Matcher matcher = _pattern.matcher(text);
703    
704                    while (matcher.find()) {
705                            if (matcher.start() < pos) {
706                                    continue;
707                            }
708    
709                            while ((length + matcher.start() - pos) >= columns) {
710                                    pos += columns - length;
711    
712                                    sb.append(text.substring(lastWrite, pos));
713                                    sb.append("<wbr/>&shy;");
714    
715                                    length = 0;
716                                    lastWrite = pos;
717                            }
718    
719                            length += matcher.start() - pos;
720    
721                            String group = matcher.group();
722    
723                            if (group.equals(StringPool.AMPERSAND)) {
724                                    int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
725    
726                                    if (x != -1) {
727                                            length++;
728                                            pos = x + 1;
729                                    }
730    
731                                    continue;
732                            }
733    
734                            if (group.equals(StringPool.LESS_THAN)) {
735                                    int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
736    
737                                    if (x != -1) {
738                                            pos = x + 1;
739                                    }
740    
741                                    continue;
742                            }
743    
744                            if (group.equals(StringPool.SPACE) ||
745                                    group.equals(StringPool.NEW_LINE)) {
746    
747                                    length = 0;
748                                    pos = matcher.start() + 1;
749                            }
750                    }
751    
752                    sb.append(text.substring(lastWrite));
753    
754                    return sb.toString();
755            }
756    
757            protected boolean isTag(char[] tag, String text, int pos) {
758                    if ((pos + tag.length + 1) <= text.length()) {
759                            char item = '\0';
760    
761                            for (int i = 0; i < tag.length; i++) {
762                                    item = text.charAt(pos++);
763    
764                                    if (Character.toLowerCase(item) != tag[i]) {
765                                            return false;
766                                    }
767                            }
768    
769                            item = text.charAt(pos);
770    
771                            // Check that char after tag is not a letter (i.e. another tag)
772    
773                            return !Character.isLetter(item);
774                    }
775                    else {
776                            return false;
777                    }
778            }
779    
780            protected int stripTag(char[] tag, String text, int pos) {
781                    int x = pos + _TAG_SCRIPT.length;
782    
783                    // Find end of the tag
784    
785                    x = text.indexOf(">", x);
786    
787                    if (x < 0) {
788                            return pos;
789                    }
790    
791                    // Check if preceding character is / (i.e. is this instance of <abc/>)
792    
793                    if (text.charAt(x-1) == '/') {
794                            return pos;
795                    }
796    
797                    // Search for the ending </abc> tag
798    
799                    while (true) {
800                            x = text.indexOf("</", x);
801    
802                            if (x >= 0) {
803                                    if (isTag(tag, text, x + 2)) {
804                                            pos = x;
805    
806                                            break;
807                                    }
808                                    else {
809    
810                                            // Skip past "</"
811    
812                                            x += 2;
813                                    }
814                            }
815                            else {
816                                    break;
817                            }
818                    }
819    
820                    return pos;
821            }
822    
823            private static final String[] _MS_WORD_HTML = new String[] {
824                    "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
825            };
826    
827            private static final String[] _MS_WORD_UNICODE = new String[] {
828                    "\u00ae", "\u2019", "\u201c", "\u201d"
829            };
830    
831            private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
832    
833            private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
834    
835            // See http://www.w3.org/TR/xpath20/#lexical-structure
836    
837            private static final char[] _XPATH_TOKENS = {
838                    '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
839                    '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232
840            };
841    
842            private static final Map<String, String> _unescapeMap = new HashMap<>();
843    
844            static {
845                    _unescapeMap.put("lt", "<");
846                    _unescapeMap.put("gt", ">");
847                    _unescapeMap.put("amp", "&");
848                    _unescapeMap.put("rsquo", "\u2019");
849                    _unescapeMap.put("#034", "\"");
850                    _unescapeMap.put("#039", "'");
851                    _unescapeMap.put("#040", "(");
852                    _unescapeMap.put("#041", ")");
853                    _unescapeMap.put("#044", ",");
854                    _unescapeMap.put("#035", "#");
855                    _unescapeMap.put("#037", "%");
856                    _unescapeMap.put("#059", ";");
857                    _unescapeMap.put("#061", "=");
858                    _unescapeMap.put("#043", "+");
859                    _unescapeMap.put("#045", "-");
860            }
861    
862            private final Pattern _pattern = Pattern.compile("([\\s<&]|$)");
863    
864    }