001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018    import com.liferay.portal.kernel.util.CharPool;
019    import com.liferay.portal.kernel.util.Html;
020    import com.liferay.portal.kernel.util.HttpUtil;
021    import com.liferay.portal.kernel.util.StringBundler;
022    import com.liferay.portal.kernel.util.StringPool;
023    import com.liferay.portal.kernel.util.StringUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.util.regex.Matcher;
027    import java.util.regex.Pattern;
028    
029    import net.htmlparser.jericho.Renderer;
030    import net.htmlparser.jericho.Source;
031    import net.htmlparser.jericho.TextExtractor;
032    
033    /**
034     * @author Brian Wing Shun Chan
035     * @author Clarence Shen
036     * @author Harry Mark
037     * @author Samuel Kong
038     * @author Connor McKay
039     * @author Shuyang Zhou
040     */
041    @DoPrivileged
042    public class HtmlImpl implements Html {
043    
044            public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045    
046            public static final int ESCAPE_MODE_CSS = 2;
047    
048            public static final int ESCAPE_MODE_JS = 3;
049    
050            public static final int ESCAPE_MODE_TEXT = 4;
051    
052            public static final int ESCAPE_MODE_URL = 5;
053    
054            /**
055             * Escapes the text so that it is safe to use in an HTML context.
056             *
057             * @param  text the text to escape
058             * @return the escaped HTML text, or <code>null</code> if the text is
059             *         <code>null</code>
060             */
061            @Override
062            public String escape(String text) {
063                    if (text == null) {
064                            return null;
065                    }
066    
067                    if (text.length() == 0) {
068                            return StringPool.BLANK;
069                    }
070    
071                    // Escape using XSS recommendations from
072                    // http://www.owasp.org/index.php/Cross_Site_Scripting
073                    // #How_to_Protect_Yourself
074    
075                    StringBundler sb = null;
076    
077                    int lastReplacementIndex = 0;
078    
079                    for (int i = 0; i < text.length(); i++) {
080                            char c = text.charAt(i);
081    
082                            String replacement = null;
083    
084                            switch (c) {
085                                    case '<':
086                                            replacement = "&lt;";
087    
088                                            break;
089    
090                                    case '>':
091                                            replacement = "&gt;";
092    
093                                            break;
094    
095                                    case '&':
096                                            replacement = "&amp;";
097    
098                                            break;
099    
100                                    case '"':
101                                            replacement = "&#034;";
102    
103                                            break;
104    
105                                    case '\'':
106                                            replacement = "&#039;";
107    
108                                            break;
109    
110                                    case '\u00bb': // '???'
111                                            replacement = "&#187;";
112    
113                                            break;
114    
115                                    case '\u2013':
116                                            replacement = "&#x2013;";
117    
118                                            break;
119    
120                                    case '\u2014':
121                                            replacement = "&#x2014;";
122    
123                                            break;
124                            }
125    
126                            if (replacement != null) {
127                                    if (sb == null) {
128                                            sb = new StringBundler();
129                                    }
130    
131                                    if (i > lastReplacementIndex) {
132                                            sb.append(text.substring(lastReplacementIndex, i));
133                                    }
134    
135                                    sb.append(replacement);
136    
137                                    lastReplacementIndex = i + 1;
138                            }
139                    }
140    
141                    if (sb == null) {
142                            return text;
143                    }
144    
145                    if (lastReplacementIndex < text.length()) {
146                            sb.append(text.substring(lastReplacementIndex));
147                    }
148    
149                    return sb.toString();
150            }
151    
152            @Override
153            public String escape(String text, int type) {
154                    if (text == null) {
155                            return null;
156                    }
157    
158                    if (text.length() == 0) {
159                            return StringPool.BLANK;
160                    }
161    
162                    String prefix = StringPool.BLANK;
163                    String postfix = StringPool.BLANK;
164    
165                    if (type == ESCAPE_MODE_ATTRIBUTE) {
166                            prefix = "&#x";
167                            postfix = StringPool.SEMICOLON;
168                    }
169                    else if (type == ESCAPE_MODE_CSS) {
170                            prefix = StringPool.BACK_SLASH;
171                    }
172                    else if (type == ESCAPE_MODE_JS) {
173                            prefix = "\\x";
174                    }
175                    else if (type == ESCAPE_MODE_URL) {
176                            return HttpUtil.encodeURL(text, true);
177                    }
178                    else {
179                            return escape(text);
180                    }
181    
182                    StringBuilder sb = new StringBuilder();
183    
184                    for (int i = 0; i < text.length(); i++) {
185                            char c = text.charAt(i);
186    
187                            if ((c > 255) || Character.isLetterOrDigit(c) ||
188                                    (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
189    
190                                    sb.append(c);
191                            }
192                            else {
193                                    sb.append(prefix);
194    
195                                    String hexString = StringUtil.toHexString(c);
196    
197                                    if (hexString.length() == 1) {
198                                            sb.append(StringPool.ASCII_TABLE[48]);
199                                    }
200    
201                                    sb.append(hexString);
202                                    sb.append(postfix);
203                            }
204                    }
205    
206                    if (sb.length() == text.length()) {
207                            return text;
208                    }
209                    else {
210                            return sb.toString();
211                    }
212            }
213    
214            @Override
215            public String escapeAttribute(String attribute) {
216                    return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
217            }
218    
219            @Override
220            public String escapeCSS(String css) {
221                    return escape(css, ESCAPE_MODE_CSS);
222            }
223    
224            @Override
225            public String escapeHREF(String href) {
226                    if (href == null) {
227                            return null;
228                    }
229    
230                    if (href.length() == 0) {
231                            return StringPool.BLANK;
232                    }
233    
234                    int index = href.indexOf(StringPool.COLON);
235    
236                    if (index == 4) {
237                            String protocol = StringUtil.toLowerCase(href.substring(0, 4));
238    
239                            if (protocol.equals("data")) {
240                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
241                            }
242                    }
243                    else if (index == 10) {
244                            String protocol = StringUtil.toLowerCase(href.substring(0, 10));
245    
246                            if (protocol.equals("javascript")) {
247                                    href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
248                            }
249                    }
250    
251                    return escapeAttribute(href);
252            }
253    
254            @Override
255            public String escapeJS(String js) {
256                    return escape(js, ESCAPE_MODE_JS);
257            }
258    
259            @Override
260            public String escapeURL(String url) {
261                    return escape(url, ESCAPE_MODE_URL);
262            }
263    
264            @Override
265            public String escapeXPath(String xPath) {
266                    if (Validator.isNull(xPath)) {
267                            return xPath;
268                    }
269    
270                    StringBuilder sb = new StringBuilder(xPath.length());
271    
272                    for (int i = 0; i < xPath.length(); i++) {
273                            char c = xPath.charAt(i);
274    
275                            boolean hasToken = false;
276    
277                            for (int j = 0; j < _XPATH_TOKENS.length; j++) {
278                                    if (c == _XPATH_TOKENS[j]) {
279                                            hasToken = true;
280    
281                                            break;
282                                    }
283                            }
284    
285                            if (hasToken) {
286                                    sb.append(StringPool.UNDERLINE);
287                            }
288                            else {
289                                    sb.append(c);
290                            }
291                    }
292    
293                    return sb.toString();
294            }
295    
296            @Override
297            public String escapeXPathAttribute(String xPathAttribute) {
298                    boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
299                    boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
300    
301                    if (hasQuote && hasApostrophe) {
302                            String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
303    
304                            return "concat('".concat(
305                                    StringUtil.merge(parts, "', \"'\", '")).concat("')");
306                    }
307    
308                    if (hasQuote) {
309                            return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
310                                    StringPool.APOSTROPHE);
311                    }
312    
313                    return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
314            }
315    
316            @Override
317            public String extractText(String html) {
318                    if (html == null) {
319                            return null;
320                    }
321    
322                    Source source = new Source(html);
323    
324                    TextExtractor textExtractor = source.getTextExtractor();
325    
326                    return textExtractor.toString();
327            }
328    
329            @Override
330            public String fromInputSafe(String text) {
331                    return StringUtil.replace(text, "&amp;", "&");
332            }
333    
334            @Override
335            public String getAUICompatibleId(String text) {
336                    if (Validator.isNull(text)) {
337                            return text;
338                    }
339    
340                    StringBundler sb = null;
341    
342                    int lastReplacementIndex = 0;
343    
344                    for (int i = 0; i < text.length(); i++) {
345                            char c = text.charAt(i);
346    
347                            if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
348                                    ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
349                                     (c != CharPool.NARROW_NO_BREAK_SPACE) &&
350                                     (c != CharPool.NO_BREAK_SPACE))) {
351    
352                                    continue;
353                            }
354    
355                            if (sb == null) {
356                                    sb = new StringBundler();
357                            }
358    
359                            if (i > lastReplacementIndex) {
360                                    sb.append(text.substring(lastReplacementIndex, i));
361                            }
362    
363                            sb.append(CharPool.UNDERLINE);
364    
365                            if (c != CharPool.UNDERLINE) {
366                                    sb.append(StringUtil.toHexString(c));
367                            }
368    
369                            sb.append(CharPool.UNDERLINE);
370    
371                            lastReplacementIndex = i + 1;
372                    }
373    
374                    if (sb == null) {
375                            return text;
376                    }
377    
378                    if (lastReplacementIndex < text.length()) {
379                            sb.append(text.substring(lastReplacementIndex));
380                    }
381    
382                    return sb.toString();
383            }
384    
385            @Deprecated
386            @Override
387            public String render(String html) {
388                    if (html == null) {
389                            return null;
390                    }
391    
392                    Source source = new Source(html);
393    
394                    Renderer renderer = source.getRenderer();
395    
396                    return renderer.toString();
397            }
398    
399            @Override
400            public String replaceMsWordCharacters(String text) {
401                    return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
402            }
403    
404            @Override
405            public String replaceNewLine(String text) {
406                    if (text == null) {
407                            return null;
408                    }
409    
410                    return text.replaceAll("\r?\n", "<br />");
411            }
412    
413            @Override
414            public String stripBetween(String text, String tag) {
415                    return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
416            }
417    
418            @Override
419            public String stripComments(String text) {
420                    return StringUtil.stripBetween(text, "<!--", "-->");
421            }
422    
423            @Override
424            public String stripHtml(String text) {
425                    if (text == null) {
426                            return null;
427                    }
428    
429                    text = stripComments(text);
430    
431                    StringBuilder sb = new StringBuilder(text.length());
432    
433                    int x = 0;
434                    int y = text.indexOf("<");
435    
436                    while (y != -1) {
437                            sb.append(text.substring(x, y));
438                            sb.append(StringPool.SPACE);
439    
440                            // Look for text enclosed by <abc></abc>
441    
442                            if (isTag(_TAG_SCRIPT, text, y + 1)) {
443                                    y = stripTag(_TAG_SCRIPT, text, y);
444                            }
445                            else if (isTag(_TAG_STYLE, text, y + 1)) {
446                                    y = stripTag(_TAG_STYLE, text, y);
447                            }
448    
449                            x = text.indexOf(">", y);
450    
451                            if (x == -1) {
452                                    break;
453                            }
454    
455                            x++;
456    
457                            if (x < y) {
458    
459                                    // <b>Hello</b
460    
461                                    break;
462                            }
463    
464                            y = text.indexOf("<", x);
465                    }
466    
467                    if (y == -1) {
468                            sb.append(text.substring(x));
469                    }
470    
471                    return sb.toString();
472            }
473    
474            @Override
475            public String toInputSafe(String text) {
476                    return StringUtil.replace(
477                            text,
478                            new String[] {"&", "\""},
479                            new String[] {"&amp;", "&quot;"});
480            }
481    
482            @Override
483            public String unescape(String text) {
484                    if (text == null) {
485                            return null;
486                    }
487    
488                    if (text.length() == 0) {
489                            return StringPool.BLANK;
490                    }
491    
492                    // Optimize this
493    
494                    text = StringUtil.replace(text, "&lt;", "<");
495                    text = StringUtil.replace(text, "&gt;", ">");
496                    text = StringUtil.replace(text, "&amp;", "&");
497                    text = StringUtil.replace(text, "&rsquo;", "\u2019");
498                    text = StringUtil.replace(text, "&#034;", "\"");
499                    text = StringUtil.replace(text, "&#039;", "'");
500                    text = StringUtil.replace(text, "&#040;", "(");
501                    text = StringUtil.replace(text, "&#041;", ")");
502                    text = StringUtil.replace(text, "&#044;", ",");
503                    text = StringUtil.replace(text, "&#035;", "#");
504                    text = StringUtil.replace(text, "&#037;", "%");
505                    text = StringUtil.replace(text, "&#059;", ";");
506                    text = StringUtil.replace(text, "&#061;", "=");
507                    text = StringUtil.replace(text, "&#043;", "+");
508                    text = StringUtil.replace(text, "&#045;", "-");
509    
510                    return text;
511            }
512    
513            @Override
514            public String unescapeCDATA(String text) {
515                    if (text == null) {
516                            return null;
517                    }
518    
519                    if (text.length() == 0) {
520                            return StringPool.BLANK;
521                    }
522    
523                    text = StringUtil.replace(text, "&lt;![CDATA[", "<![CDATA[");
524                    text = StringUtil.replace(text, "]]&gt;", "]]>");
525    
526                    return text;
527            }
528    
529            @Override
530            public String wordBreak(String text, int columns) {
531                    StringBundler sb = new StringBundler();
532    
533                    int length = 0;
534                    int lastWrite = 0;
535                    int pos = 0;
536    
537                    Pattern pattern = Pattern.compile("([\\s<&]|$)");
538    
539                    Matcher matcher = pattern.matcher(text);
540    
541                    while (matcher.find()) {
542                            if (matcher.start() < pos) {
543                                    continue;
544                            }
545    
546                            while ((length + matcher.start() - pos) >= columns) {
547                                    pos += columns - length;
548    
549                                    sb.append(text.substring(lastWrite, pos));
550                                    sb.append("<wbr/>&shy;");
551    
552                                    length = 0;
553                                    lastWrite = pos;
554                            }
555    
556                            length += matcher.start() - pos;
557    
558                            String group = matcher.group();
559    
560                            if (group.equals(StringPool.AMPERSAND)) {
561                                    int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
562    
563                                    if (x != -1) {
564                                            length++;
565                                            pos = x + 1;
566                                    }
567    
568                                    continue;
569                            }
570    
571                            if (group.equals(StringPool.LESS_THAN)) {
572                                    int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
573    
574                                    if (x != -1) {
575                                            pos = x + 1;
576                                    }
577    
578                                    continue;
579                            }
580    
581                            if (group.equals(StringPool.SPACE) ||
582                                    group.equals(StringPool.NEW_LINE)) {
583    
584                                    length = 0;
585                                    pos = matcher.start() + 1;
586                            }
587                    }
588    
589                    sb.append(text.substring(lastWrite));
590    
591                    return sb.toString();
592            }
593    
594            protected boolean isTag(char[] tag, String text, int pos) {
595                    if ((pos + tag.length + 1) <= text.length()) {
596                            char item;
597    
598                            for (int i = 0; i < tag.length; i++) {
599                                    item = text.charAt(pos++);
600    
601                                    if (Character.toLowerCase(item) != tag[i]) {
602                                            return false;
603                                    }
604                            }
605    
606                            item = text.charAt(pos);
607    
608                            // Check that char after tag is not a letter (i.e. another tag)
609    
610                            return !Character.isLetter(item);
611                    }
612                    else {
613                            return false;
614                    }
615            }
616    
617            protected int stripTag(char[] tag, String text, int pos) {
618                    int x = pos + _TAG_SCRIPT.length;
619    
620                    // Find end of the tag
621    
622                    x = text.indexOf(">", x);
623    
624                    if (x < 0) {
625                            return pos;
626                    }
627    
628                    // Check if preceding character is / (i.e. is this instance of <abc/>)
629    
630                    if (text.charAt(x-1) == '/') {
631                            return pos;
632                    }
633    
634                    // Search for the ending </abc> tag
635    
636                    while (true) {
637                            x = text.indexOf("</", x);
638    
639                            if (x >= 0) {
640                                    if (isTag(tag, text, x + 2)) {
641                                            pos = x;
642    
643                                            break;
644                                    }
645                                    else {
646    
647                                            // Skip past "</"
648    
649                                            x += 2;
650                                    }
651                            }
652                            else {
653                                    break;
654                            }
655                    }
656    
657                    return pos;
658            }
659    
660            private static final String[] _MS_WORD_HTML = new String[] {
661                    "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
662            };
663    
664            private static final String[] _MS_WORD_UNICODE = new String[] {
665                    "\u00ae", "\u2019", "\u201c", "\u201d"
666            };
667    
668            private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
669    
670            private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
671    
672            // See http://www.w3.org/TR/xpath20/#lexical-structure
673    
674            private static final char[] _XPATH_TOKENS = {
675                    '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
676                    '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
677    
678    }