001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import net.htmlparser.jericho.Renderer;
032 import net.htmlparser.jericho.Source;
033 import net.htmlparser.jericho.TextExtractor;
034
035
049 @DoPrivileged
050 public class HtmlImpl implements Html {
051
052 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053
054 public static final int ESCAPE_MODE_CSS = 2;
055
056 public static final int ESCAPE_MODE_JS = 3;
057
058 public static final int ESCAPE_MODE_TEXT = 4;
059
060 public static final int ESCAPE_MODE_URL = 5;
061
062
072 @Override
073 public String buildData(Map<String, Object> data) {
074 if ((data == null) || data.isEmpty()) {
075 return StringPool.BLANK;
076 }
077
078 StringBundler sb = new StringBundler(data.size() * 5);
079
080 for (Map.Entry<String, Object> entry : data.entrySet()) {
081 sb.append("data-");
082 sb.append(entry.getKey());
083 sb.append("=\"");
084 sb.append(escapeAttribute(String.valueOf(entry.getValue())));
085 sb.append("\" ");
086 }
087
088 return sb.toString();
089 }
090
091
098 @Override
099 public String escape(String text) {
100 if (text == null) {
101 return null;
102 }
103
104 if (text.length() == 0) {
105 return StringPool.BLANK;
106 }
107
108
109
110
111
112 StringBundler sb = null;
113
114 int lastReplacementIndex = 0;
115
116 for (int i = 0; i < text.length(); i++) {
117 char c = text.charAt(i);
118
119 String replacement = null;
120
121 switch (c) {
122 case '<':
123 replacement = "<";
124
125 break;
126
127 case '>':
128 replacement = ">";
129
130 break;
131
132 case '&':
133 replacement = "&";
134
135 break;
136
137 case '"':
138 replacement = """;
139
140 break;
141
142 case '\'':
143 replacement = "'";
144
145 break;
146
147 case '\u00bb':
148 replacement = "»";
149
150 break;
151
152 case '\u2013':
153 replacement = "–";
154
155 break;
156
157 case '\u2014':
158 replacement = "—";
159
160 break;
161 }
162
163 if (replacement != null) {
164 if (sb == null) {
165 sb = new StringBundler();
166 }
167
168 if (i > lastReplacementIndex) {
169 sb.append(text.substring(lastReplacementIndex, i));
170 }
171
172 sb.append(replacement);
173
174 lastReplacementIndex = i + 1;
175 }
176 }
177
178 if (sb == null) {
179 return text;
180 }
181
182 if (lastReplacementIndex < text.length()) {
183 sb.append(text.substring(lastReplacementIndex));
184 }
185
186 return sb.toString();
187 }
188
189
205 @Override
206 public String escape(String text, int mode) {
207 if (text == null) {
208 return null;
209 }
210
211 if (text.length() == 0) {
212 return StringPool.BLANK;
213 }
214
215 String prefix = StringPool.BLANK;
216 String postfix = StringPool.BLANK;
217
218 if (mode == ESCAPE_MODE_ATTRIBUTE) {
219 prefix = "&#x";
220 postfix = StringPool.SEMICOLON;
221 }
222 else if (mode == ESCAPE_MODE_CSS) {
223 prefix = StringPool.BACK_SLASH;
224 }
225 else if (mode == ESCAPE_MODE_JS) {
226 prefix = "\\x";
227 }
228 else if (mode == ESCAPE_MODE_URL) {
229 return HttpUtil.encodeURL(text, true);
230 }
231 else {
232 return escape(text);
233 }
234
235 StringBuilder sb = new StringBuilder(text.length());
236
237 for (int i = 0; i < text.length(); i++) {
238 char c = text.charAt(i);
239
240 if ((c > 255) || (c == CharPool.DASH) ||
241 (c == CharPool.UNDERLINE) || Character.isLetterOrDigit(c)) {
242
243 sb.append(c);
244 }
245 else {
246 sb.append(prefix);
247
248 String hexString = StringUtil.toHexString(c);
249
250 if (hexString.length() == 1) {
251 sb.append(StringPool.ASCII_TABLE[48]);
252 }
253
254 sb.append(hexString);
255 sb.append(postfix);
256 }
257 }
258
259 if (sb.length() == text.length()) {
260 return text;
261 }
262
263 return sb.toString();
264 }
265
266
274 @Override
275 public String escapeAttribute(String attribute) {
276 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
277 }
278
279
286 @Override
287 public String escapeCSS(String css) {
288 return escape(css, ESCAPE_MODE_CSS);
289 }
290
291
299 @Override
300 public String escapeHREF(String href) {
301 if (href == null) {
302 return null;
303 }
304
305 if (href.length() == 0) {
306 return StringPool.BLANK;
307 }
308
309 int index = href.indexOf(StringPool.COLON);
310
311 if (index == 4) {
312 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
313
314 if (protocol.equals("data")) {
315 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
316 }
317 }
318 else if (index == 10) {
319 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
320
321 if (protocol.equals("javascript")) {
322 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
323 }
324 }
325
326 return escapeAttribute(href);
327 }
328
329
337 @Override
338 public String escapeJS(String js) {
339 return escape(js, ESCAPE_MODE_JS);
340 }
341
342 @Override
343 public String escapeJSLink(String link) {
344 if (Validator.isNull(link)) {
345 return StringPool.BLANK;
346 }
347
348 if (link.indexOf(StringPool.COLON) == 10) {
349 String protocol = StringUtil.toLowerCase(link.substring(0, 10));
350
351 if (protocol.equals("javascript")) {
352 link = StringUtil.replaceFirst(link, StringPool.COLON, "%3a");
353 }
354 }
355
356 return link;
357 }
358
359
366 @Override
367 public String escapeURL(String url) {
368 return escape(url, ESCAPE_MODE_URL);
369 }
370
371 @Override
372 public String escapeXPath(String xPath) {
373 if (Validator.isNull(xPath)) {
374 return xPath;
375 }
376
377 StringBuilder sb = new StringBuilder(xPath.length());
378
379 for (int i = 0; i < xPath.length(); i++) {
380 char c = xPath.charAt(i);
381
382 boolean hasToken = false;
383
384 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
385 if (c == _XPATH_TOKENS[j]) {
386 hasToken = true;
387
388 break;
389 }
390 }
391
392 if (hasToken) {
393 sb.append(StringPool.UNDERLINE);
394 }
395 else {
396 sb.append(c);
397 }
398 }
399
400 return sb.toString();
401 }
402
403 @Override
404 public String escapeXPathAttribute(String xPathAttribute) {
405 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
406 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
407
408 if (hasQuote && hasApostrophe) {
409 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
410
411 return "concat('".concat(
412 StringUtil.merge(parts, "', \"'\", '")).concat("')");
413 }
414
415 if (hasQuote) {
416 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
417 StringPool.APOSTROPHE);
418 }
419
420 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
421 }
422
423
436 @Override
437 public String extractText(String html) {
438 if (html == null) {
439 return null;
440 }
441
442 Source source = new Source(html);
443
444 TextExtractor textExtractor = source.getTextExtractor();
445
446 return textExtractor.toString();
447 }
448
449 @Override
450 public String fromInputSafe(String text) {
451 return StringUtil.replace(text, "&", "&");
452 }
453
454 @Override
455 public String getAUICompatibleId(String text) {
456 if (Validator.isNull(text)) {
457 return text;
458 }
459
460 StringBundler sb = null;
461
462 int lastReplacementIndex = 0;
463
464 for (int i = 0; i < text.length(); i++) {
465 char c = text.charAt(i);
466
467 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
468 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
469 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
470 (c != CharPool.NO_BREAK_SPACE))) {
471
472 continue;
473 }
474
475 if (sb == null) {
476 sb = new StringBundler();
477 }
478
479 if (i > lastReplacementIndex) {
480 sb.append(text.substring(lastReplacementIndex, i));
481 }
482
483 sb.append(CharPool.UNDERLINE);
484
485 if (c != CharPool.UNDERLINE) {
486 sb.append(StringUtil.toHexString(c));
487 }
488
489 sb.append(CharPool.UNDERLINE);
490
491 lastReplacementIndex = i + 1;
492 }
493
494 if (sb == null) {
495 return text;
496 }
497
498 if (lastReplacementIndex < text.length()) {
499 sb.append(text.substring(lastReplacementIndex));
500 }
501
502 return sb.toString();
503 }
504
505
521 @Override
522 public String render(String html) {
523 if (html == null) {
524 return null;
525 }
526
527 Source source = new Source(html);
528
529 Renderer renderer = source.getRenderer();
530
531 return renderer.toString();
532 }
533
534
543 @Deprecated
544 @Override
545 public String replaceMsWordCharacters(String text) {
546 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
547 }
548
549
557 @Override
558 public String replaceNewLine(String html) {
559 if (html == null) {
560 return null;
561 }
562
563 html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
564
565 return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
566 }
567
568
584 @Override
585 public String stripBetween(String text, String tag) {
586 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
587 }
588
589
596 @Override
597 public String stripComments(String text) {
598 return StringUtil.stripBetween(text, "<!--", "-->");
599 }
600
601 @Override
602 public String stripHtml(String text) {
603 if (text == null) {
604 return null;
605 }
606
607 text = stripComments(text);
608
609 StringBuilder sb = new StringBuilder(text.length());
610
611 int x = 0;
612 int y = text.indexOf("<");
613
614 while (y != -1) {
615 sb.append(text.substring(x, y));
616 sb.append(StringPool.SPACE);
617
618
619
620 if (isTag(_TAG_SCRIPT, text, y + 1)) {
621 y = stripTag(_TAG_SCRIPT, text, y);
622 }
623 else if (isTag(_TAG_STYLE, text, y + 1)) {
624 y = stripTag(_TAG_STYLE, text, y);
625 }
626
627 x = text.indexOf(">", y);
628
629 if (x == -1) {
630 break;
631 }
632
633 x++;
634
635 if (x < y) {
636
637
638
639 break;
640 }
641
642 y = text.indexOf("<", x);
643 }
644
645 if (y == -1) {
646 sb.append(text.substring(x));
647 }
648
649 return sb.toString();
650 }
651
652
664 @Override
665 public String toInputSafe(String text) {
666 return StringUtil.replace(
667 text,
668 new String[] {"&", "\""},
669 new String[] {"&", """});
670 }
671
672 @Override
673 public String unescape(String text) {
674 return StringUtil.replace(text, "&", ";", _unescapeMap);
675 }
676
677 @Override
678 public String unescapeCDATA(String text) {
679 if (text == null) {
680 return null;
681 }
682
683 if (text.length() == 0) {
684 return StringPool.BLANK;
685 }
686
687 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
688 text = StringUtil.replace(text, "]]>", "]]>");
689
690 return text;
691 }
692
693 @Override
694 public String wordBreak(String text, int columns) {
695 StringBundler sb = new StringBundler();
696
697 int length = 0;
698 int lastWrite = 0;
699 int pos = 0;
700
701 Matcher matcher = _pattern.matcher(text);
702
703 while (matcher.find()) {
704 if (matcher.start() < pos) {
705 continue;
706 }
707
708 while ((length + matcher.start() - pos) >= columns) {
709 pos += columns - length;
710
711 sb.append(text.substring(lastWrite, pos));
712 sb.append("<wbr/>­");
713
714 length = 0;
715 lastWrite = pos;
716 }
717
718 length += matcher.start() - pos;
719
720 String group = matcher.group();
721
722 if (group.equals(StringPool.AMPERSAND)) {
723 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
724
725 if (x != -1) {
726 length++;
727 pos = x + 1;
728 }
729
730 continue;
731 }
732
733 if (group.equals(StringPool.LESS_THAN)) {
734 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
735
736 if (x != -1) {
737 pos = x + 1;
738 }
739
740 continue;
741 }
742
743 if (group.equals(StringPool.SPACE) ||
744 group.equals(StringPool.NEW_LINE)) {
745
746 length = 0;
747 pos = matcher.start() + 1;
748 }
749 }
750
751 sb.append(text.substring(lastWrite));
752
753 return sb.toString();
754 }
755
756 protected boolean isTag(char[] tag, String text, int pos) {
757 if ((pos + tag.length + 1) <= text.length()) {
758 char item = '\0';
759
760 for (int i = 0; i < tag.length; i++) {
761 item = text.charAt(pos++);
762
763 if (Character.toLowerCase(item) != tag[i]) {
764 return false;
765 }
766 }
767
768 item = text.charAt(pos);
769
770
771
772 return !Character.isLetter(item);
773 }
774 else {
775 return false;
776 }
777 }
778
779 protected int stripTag(char[] tag, String text, int pos) {
780 int x = pos + _TAG_SCRIPT.length;
781
782
783
784 x = text.indexOf(">", x);
785
786 if (x < 0) {
787 return pos;
788 }
789
790
791
792 if (text.charAt(x-1) == '/') {
793 return pos;
794 }
795
796
797
798 while (true) {
799 x = text.indexOf("</", x);
800
801 if (x >= 0) {
802 if (isTag(tag, text, x + 2)) {
803 pos = x;
804
805 break;
806 }
807 else {
808
809
810
811 x += 2;
812 }
813 }
814 else {
815 break;
816 }
817 }
818
819 return pos;
820 }
821
822 private static final String[] _MS_WORD_HTML = new String[] {
823 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
824 };
825
826 private static final String[] _MS_WORD_UNICODE = new String[] {
827 "\u00ae", "\u2019", "\u201c", "\u201d"
828 };
829
830 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
831
832 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
833
834
835
836 private static final char[] _XPATH_TOKENS = {
837 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
838 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232
839 };
840
841 private static final Map<String, String> _unescapeMap = new HashMap<>();
842
843 static {
844 _unescapeMap.put("lt", "<");
845 _unescapeMap.put("gt", ">");
846 _unescapeMap.put("amp", "&");
847 _unescapeMap.put("rsquo", "\u2019");
848 _unescapeMap.put("#034", "\"");
849 _unescapeMap.put("#039", "'");
850 _unescapeMap.put("#040", "(");
851 _unescapeMap.put("#041", ")");
852 _unescapeMap.put("#044", ",");
853 _unescapeMap.put("#035", "#");
854 _unescapeMap.put("#037", "%");
855 _unescapeMap.put("#059", ";");
856 _unescapeMap.put("#061", "=");
857 _unescapeMap.put("#043", "+");
858 _unescapeMap.put("#045", "-");
859 }
860
861 private final Pattern _pattern = Pattern.compile("([\\s<&]|$)");
862
863 }