001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import net.htmlparser.jericho.Renderer;
032 import net.htmlparser.jericho.Source;
033 import net.htmlparser.jericho.TextExtractor;
034
035
049 @DoPrivileged
050 public class HtmlImpl implements Html {
051
052 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053
054 public static final int ESCAPE_MODE_CSS = 2;
055
056 public static final int ESCAPE_MODE_JS = 3;
057
058 public static final int ESCAPE_MODE_TEXT = 4;
059
060 public static final int ESCAPE_MODE_URL = 5;
061
062
069 @Override
070 public String escape(String text) {
071 if (text == null) {
072 return null;
073 }
074
075 if (text.length() == 0) {
076 return StringPool.BLANK;
077 }
078
079
080
081
082
083 StringBundler sb = null;
084
085 int lastReplacementIndex = 0;
086
087 for (int i = 0; i < text.length(); i++) {
088 char c = text.charAt(i);
089
090 String replacement = null;
091
092 switch (c) {
093 case '<':
094 replacement = "<";
095
096 break;
097
098 case '>':
099 replacement = ">";
100
101 break;
102
103 case '&':
104 replacement = "&";
105
106 break;
107
108 case '"':
109 replacement = """;
110
111 break;
112
113 case '\'':
114 replacement = "'";
115
116 break;
117
118 case '\u00bb':
119 replacement = "»";
120
121 break;
122
123 case '\u2013':
124 replacement = "–";
125
126 break;
127
128 case '\u2014':
129 replacement = "—";
130
131 break;
132 }
133
134 if (replacement != null) {
135 if (sb == null) {
136 sb = new StringBundler();
137 }
138
139 if (i > lastReplacementIndex) {
140 sb.append(text.substring(lastReplacementIndex, i));
141 }
142
143 sb.append(replacement);
144
145 lastReplacementIndex = i + 1;
146 }
147 }
148
149 if (sb == null) {
150 return text;
151 }
152
153 if (lastReplacementIndex < text.length()) {
154 sb.append(text.substring(lastReplacementIndex));
155 }
156
157 return sb.toString();
158 }
159
160
176 @Override
177 public String escape(String text, int mode) {
178 if (text == null) {
179 return null;
180 }
181
182 if (text.length() == 0) {
183 return StringPool.BLANK;
184 }
185
186 String prefix = StringPool.BLANK;
187 String postfix = StringPool.BLANK;
188
189 if (mode == ESCAPE_MODE_ATTRIBUTE) {
190 prefix = "&#x";
191 postfix = StringPool.SEMICOLON;
192 }
193 else if (mode == ESCAPE_MODE_CSS) {
194 prefix = StringPool.BACK_SLASH;
195 }
196 else if (mode == ESCAPE_MODE_JS) {
197 prefix = "\\x";
198 }
199 else if (mode == ESCAPE_MODE_URL) {
200 return HttpUtil.encodeURL(text, true);
201 }
202 else {
203 return escape(text);
204 }
205
206 StringBuilder sb = new StringBuilder();
207
208 for (int i = 0; i < text.length(); i++) {
209 char c = text.charAt(i);
210
211 if ((c > 255) || Character.isLetterOrDigit(c) ||
212 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
213
214 sb.append(c);
215 }
216 else {
217 sb.append(prefix);
218
219 String hexString = StringUtil.toHexString(c);
220
221 if (hexString.length() == 1) {
222 sb.append(StringPool.ASCII_TABLE[48]);
223 }
224
225 sb.append(hexString);
226 sb.append(postfix);
227 }
228 }
229
230 if (sb.length() == text.length()) {
231 return text;
232 }
233 else {
234 return sb.toString();
235 }
236 }
237
238
246 @Override
247 public String escapeAttribute(String attribute) {
248 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
249 }
250
251
258 @Override
259 public String escapeCSS(String css) {
260 return escape(css, ESCAPE_MODE_CSS);
261 }
262
263
271 @Override
272 public String escapeHREF(String href) {
273 if (href == null) {
274 return null;
275 }
276
277 if (href.length() == 0) {
278 return StringPool.BLANK;
279 }
280
281 if (href.indexOf(StringPool.COLON) == 10) {
282 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
283
284 if (protocol.equals("javascript")) {
285 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
286 }
287 }
288
289 return escapeAttribute(href);
290 }
291
292
300 @Override
301 public String escapeJS(String js) {
302 return escape(js, ESCAPE_MODE_JS);
303 }
304
305 @Override
306 public String escapeJSLink(String link) {
307 if (Validator.isNull(link)) {
308 return StringPool.BLANK;
309 }
310
311 if (link.indexOf(StringPool.COLON) == 10) {
312 String protocol = StringUtil.toLowerCase(link.substring(0, 10));
313
314 if (protocol.equals("javascript")) {
315 link = StringUtil.replaceFirst(link, StringPool.COLON, "%3a");
316 }
317 }
318
319 return link;
320 }
321
322
329 @Override
330 public String escapeURL(String url) {
331 return escape(url, ESCAPE_MODE_URL);
332 }
333
334 @Override
335 public String escapeXPath(String xPath) {
336 if (Validator.isNull(xPath)) {
337 return xPath;
338 }
339
340 StringBuilder sb = new StringBuilder(xPath.length());
341
342 for (int i = 0; i < xPath.length(); i++) {
343 char c = xPath.charAt(i);
344
345 boolean hasToken = false;
346
347 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
348 if (c == _XPATH_TOKENS[j]) {
349 hasToken = true;
350
351 break;
352 }
353 }
354
355 if (hasToken) {
356 sb.append(StringPool.UNDERLINE);
357 }
358 else {
359 sb.append(c);
360 }
361 }
362
363 return sb.toString();
364 }
365
366 @Override
367 public String escapeXPathAttribute(String xPathAttribute) {
368 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
369 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
370
371 if (hasQuote && hasApostrophe) {
372 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
373
374 return "concat('".concat(
375 StringUtil.merge(parts, "', \"'\", '")).concat("')");
376 }
377
378 if (hasQuote) {
379 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
380 StringPool.APOSTROPHE);
381 }
382
383 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
384 }
385
386
399 @Override
400 public String extractText(String html) {
401 if (html == null) {
402 return null;
403 }
404
405 Source source = new Source(html);
406
407 TextExtractor textExtractor = source.getTextExtractor();
408
409 return textExtractor.toString();
410 }
411
412 @Override
413 public String fromInputSafe(String text) {
414 return StringUtil.replace(text, "&", "&");
415 }
416
417 @Override
418 public String getAUICompatibleId(String text) {
419 if (Validator.isNull(text)) {
420 return text;
421 }
422
423 StringBundler sb = null;
424
425 int lastReplacementIndex = 0;
426
427 for (int i = 0; i < text.length(); i++) {
428 char c = text.charAt(i);
429
430 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
431 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
432 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
433 (c != CharPool.NO_BREAK_SPACE))) {
434
435 continue;
436 }
437
438 if (sb == null) {
439 sb = new StringBundler();
440 }
441
442 if (i > lastReplacementIndex) {
443 sb.append(text.substring(lastReplacementIndex, i));
444 }
445
446 sb.append(CharPool.UNDERLINE);
447
448 if (c != CharPool.UNDERLINE) {
449 sb.append(StringUtil.toHexString(c));
450 }
451
452 sb.append(CharPool.UNDERLINE);
453
454 lastReplacementIndex = i + 1;
455 }
456
457 if (sb == null) {
458 return text;
459 }
460
461 if (lastReplacementIndex < text.length()) {
462 sb.append(text.substring(lastReplacementIndex));
463 }
464
465 return sb.toString();
466 }
467
468
484 @Override
485 public String render(String html) {
486 if (html == null) {
487 return null;
488 }
489
490 Source source = new Source(html);
491
492 Renderer renderer = source.getRenderer();
493
494 return renderer.toString();
495 }
496
497
506 @Deprecated
507 @Override
508 public String replaceMsWordCharacters(String text) {
509 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
510 }
511
512
520 @Override
521 public String replaceNewLine(String html) {
522 if (html == null) {
523 return null;
524 }
525
526 html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
527
528 return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
529 }
530
531
547 @Override
548 public String stripBetween(String text, String tag) {
549 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
550 }
551
552
559 @Override
560 public String stripComments(String text) {
561 return StringUtil.stripBetween(text, "<!--", "-->");
562 }
563
564 @Override
565 public String stripHtml(String text) {
566 if (text == null) {
567 return null;
568 }
569
570 text = stripComments(text);
571
572 StringBuilder sb = new StringBuilder(text.length());
573
574 int x = 0;
575 int y = text.indexOf("<");
576
577 while (y != -1) {
578 sb.append(text.substring(x, y));
579 sb.append(StringPool.SPACE);
580
581
582
583 if (isTag(_TAG_SCRIPT, text, y + 1)) {
584 y = stripTag(_TAG_SCRIPT, text, y);
585 }
586 else if (isTag(_TAG_STYLE, text, y + 1)) {
587 y = stripTag(_TAG_STYLE, text, y);
588 }
589
590 x = text.indexOf(">", y);
591
592 if (x == -1) {
593 break;
594 }
595
596 x++;
597
598 if (x < y) {
599
600
601
602 break;
603 }
604
605 y = text.indexOf("<", x);
606 }
607
608 if (y == -1) {
609 sb.append(text.substring(x));
610 }
611
612 return sb.toString();
613 }
614
615
627 @Override
628 public String toInputSafe(String text) {
629 return StringUtil.replace(
630 text,
631 new String[] {"&", "\""},
632 new String[] {"&", """});
633 }
634
635 @Override
636 public String unescape(String text) {
637 return StringUtil.replace(text, "&", ";", _unescapeMap);
638 }
639
640 @Override
641 public String unescapeCDATA(String text) {
642 if (text == null) {
643 return null;
644 }
645
646 if (text.length() == 0) {
647 return StringPool.BLANK;
648 }
649
650 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
651 text = StringUtil.replace(text, "]]>", "]]>");
652
653 return text;
654 }
655
656 @Override
657 public String wordBreak(String text, int columns) {
658 StringBundler sb = new StringBundler();
659
660 int length = 0;
661 int lastWrite = 0;
662 int pos = 0;
663
664 Matcher matcher = _pattern.matcher(text);
665
666 while (matcher.find()) {
667 if (matcher.start() < pos) {
668 continue;
669 }
670
671 while ((length + matcher.start() - pos) >= columns) {
672 pos += columns - length;
673
674 sb.append(text.substring(lastWrite, pos));
675 sb.append("<wbr/>­");
676
677 length = 0;
678 lastWrite = pos;
679 }
680
681 length += matcher.start() - pos;
682
683 String group = matcher.group();
684
685 if (group.equals(StringPool.AMPERSAND)) {
686 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
687
688 if (x != -1) {
689 length++;
690 pos = x + 1;
691 }
692
693 continue;
694 }
695
696 if (group.equals(StringPool.LESS_THAN)) {
697 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
698
699 if (x != -1) {
700 pos = x + 1;
701 }
702
703 continue;
704 }
705
706 if (group.equals(StringPool.SPACE) ||
707 group.equals(StringPool.NEW_LINE)) {
708
709 length = 0;
710 pos = matcher.start() + 1;
711 }
712 }
713
714 sb.append(text.substring(lastWrite));
715
716 return sb.toString();
717 }
718
719 protected boolean isTag(char[] tag, String text, int pos) {
720 if ((pos + tag.length + 1) <= text.length()) {
721 char item;
722
723 for (int i = 0; i < tag.length; i++) {
724 item = text.charAt(pos++);
725
726 if (Character.toLowerCase(item) != tag[i]) {
727 return false;
728 }
729 }
730
731 item = text.charAt(pos);
732
733
734
735 return !Character.isLetter(item);
736 }
737 else {
738 return false;
739 }
740 }
741
742 protected int stripTag(char[] tag, String text, int pos) {
743 int x = pos + _TAG_SCRIPT.length;
744
745
746
747 x = text.indexOf(">", x);
748
749 if (x < 0) {
750 return pos;
751 }
752
753
754
755 if (text.charAt(x-1) == '/') {
756 return pos;
757 }
758
759
760
761 while (true) {
762 x = text.indexOf("</", x);
763
764 if (x >= 0) {
765 if (isTag(tag, text, x + 2)) {
766 pos = x;
767
768 break;
769 }
770 else {
771
772
773
774 x += 2;
775 }
776 }
777 else {
778 break;
779 }
780 }
781
782 return pos;
783 }
784
785 private static final String[] _MS_WORD_HTML = new String[] {
786 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
787 };
788
789 private static final String[] _MS_WORD_UNICODE = new String[] {
790 "\u00ae", "\u2019", "\u201c", "\u201d"
791 };
792
793 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
794
795 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
796
797
798
799 private static final char[] _XPATH_TOKENS = {
800 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
801 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
802
803 private static final Map<String, String> _unescapeMap =
804 new HashMap<String, String>();
805
806 static {
807 _unescapeMap.put("lt", "<");
808 _unescapeMap.put("gt", ">");
809 _unescapeMap.put("amp", "&");
810 _unescapeMap.put("rsquo", "\u2019");
811 _unescapeMap.put("#034", "\"");
812 _unescapeMap.put("#039", "'");
813 _unescapeMap.put("#040", "(");
814 _unescapeMap.put("#041", ")");
815 _unescapeMap.put("#044", ",");
816 _unescapeMap.put("#035", "#");
817 _unescapeMap.put("#037", "%");
818 _unescapeMap.put("#059", ";");
819 _unescapeMap.put("#061", "=");
820 _unescapeMap.put("#043", "+");
821 _unescapeMap.put("#045", "-");
822 }
823
824 private Pattern _pattern = Pattern.compile("([\\s<&]|$)");
825
826 }