001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import net.htmlparser.jericho.Renderer;
032 import net.htmlparser.jericho.Source;
033 import net.htmlparser.jericho.TextExtractor;
034
035
049 @DoPrivileged
050 public class HtmlImpl implements Html {
051
052 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053
054 public static final int ESCAPE_MODE_CSS = 2;
055
056 public static final int ESCAPE_MODE_JS = 3;
057
058 public static final int ESCAPE_MODE_TEXT = 4;
059
060 public static final int ESCAPE_MODE_URL = 5;
061
062
072 @Override
073 public String buildData(Map<String, Object> data) {
074 if ((data == null) || data.isEmpty()) {
075 return StringPool.BLANK;
076 }
077
078 StringBundler sb = new StringBundler(data.size() * 5);
079
080 for (Map.Entry<String, Object> entry : data.entrySet()) {
081 sb.append("data-");
082 sb.append(entry.getKey());
083 sb.append("=\"");
084 sb.append(escapeAttribute(String.valueOf(entry.getValue())));
085 sb.append("\" ");
086 }
087
088 return sb.toString();
089 }
090
091
098 @Override
099 public String escape(String text) {
100 if (text == null) {
101 return null;
102 }
103
104 if (text.length() == 0) {
105 return StringPool.BLANK;
106 }
107
108
109
110
111
112 StringBundler sb = null;
113
114 int lastReplacementIndex = 0;
115
116 for (int i = 0; i < text.length(); i++) {
117 char c = text.charAt(i);
118
119 String replacement = null;
120
121 switch (c) {
122 case '<':
123 replacement = "<";
124
125 break;
126
127 case '>':
128 replacement = ">";
129
130 break;
131
132 case '&':
133 replacement = "&";
134
135 break;
136
137 case '"':
138 replacement = """;
139
140 break;
141
142 case '\'':
143 replacement = "'";
144
145 break;
146
147 case '\u00bb':
148 replacement = "»";
149
150 break;
151
152 case '\u2013':
153 replacement = "–";
154
155 break;
156
157 case '\u2014':
158 replacement = "—";
159
160 break;
161 }
162
163 if (replacement != null) {
164 if (sb == null) {
165 sb = new StringBundler();
166 }
167
168 if (i > lastReplacementIndex) {
169 sb.append(text.substring(lastReplacementIndex, i));
170 }
171
172 sb.append(replacement);
173
174 lastReplacementIndex = i + 1;
175 }
176 }
177
178 if (sb == null) {
179 return text;
180 }
181
182 if (lastReplacementIndex < text.length()) {
183 sb.append(text.substring(lastReplacementIndex));
184 }
185
186 return sb.toString();
187 }
188
189
205 @Override
206 public String escape(String text, int mode) {
207 if (text == null) {
208 return null;
209 }
210
211 if (text.length() == 0) {
212 return StringPool.BLANK;
213 }
214
215 String prefix = StringPool.BLANK;
216 String postfix = StringPool.BLANK;
217
218 if (mode == ESCAPE_MODE_ATTRIBUTE) {
219 prefix = "&#x";
220 postfix = StringPool.SEMICOLON;
221 }
222 else if (mode == ESCAPE_MODE_CSS) {
223 prefix = StringPool.BACK_SLASH;
224 }
225 else if (mode == ESCAPE_MODE_JS) {
226 prefix = "\\x";
227 }
228 else if (mode == ESCAPE_MODE_URL) {
229 return HttpUtil.encodeURL(text, true);
230 }
231 else {
232 return escape(text);
233 }
234
235 StringBuilder sb = new StringBuilder();
236
237 for (int i = 0; i < text.length(); i++) {
238 char c = text.charAt(i);
239
240 if ((c > 255) || Character.isLetterOrDigit(c) ||
241 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
242
243 sb.append(c);
244 }
245 else {
246 sb.append(prefix);
247
248 String hexString = StringUtil.toHexString(c);
249
250 if (hexString.length() == 1) {
251 sb.append(StringPool.ASCII_TABLE[48]);
252 }
253
254 sb.append(hexString);
255 sb.append(postfix);
256 }
257 }
258
259 if (sb.length() == text.length()) {
260 return text;
261 }
262 else {
263 return sb.toString();
264 }
265 }
266
267
275 @Override
276 public String escapeAttribute(String attribute) {
277 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
278 }
279
280
287 @Override
288 public String escapeCSS(String css) {
289 return escape(css, ESCAPE_MODE_CSS);
290 }
291
292
300 @Override
301 public String escapeHREF(String href) {
302 if (href == null) {
303 return null;
304 }
305
306 if (href.length() == 0) {
307 return StringPool.BLANK;
308 }
309
310 int index = href.indexOf(StringPool.COLON);
311
312 if (index == 4) {
313 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
314
315 if (protocol.equals("data")) {
316 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
317 }
318 }
319 else if (index == 10) {
320 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
321
322 if (protocol.equals("javascript")) {
323 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
324 }
325 }
326
327 return escapeAttribute(href);
328 }
329
330
338 @Override
339 public String escapeJS(String js) {
340 return escape(js, ESCAPE_MODE_JS);
341 }
342
343 @Override
344 public String escapeJSLink(String link) {
345 if (Validator.isNull(link)) {
346 return StringPool.BLANK;
347 }
348
349 if (link.indexOf(StringPool.COLON) == 10) {
350 String protocol = StringUtil.toLowerCase(link.substring(0, 10));
351
352 if (protocol.equals("javascript")) {
353 link = StringUtil.replaceFirst(link, StringPool.COLON, "%3a");
354 }
355 }
356
357 return link;
358 }
359
360
367 @Override
368 public String escapeURL(String url) {
369 return escape(url, ESCAPE_MODE_URL);
370 }
371
372 @Override
373 public String escapeXPath(String xPath) {
374 if (Validator.isNull(xPath)) {
375 return xPath;
376 }
377
378 StringBuilder sb = new StringBuilder(xPath.length());
379
380 for (int i = 0; i < xPath.length(); i++) {
381 char c = xPath.charAt(i);
382
383 boolean hasToken = false;
384
385 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
386 if (c == _XPATH_TOKENS[j]) {
387 hasToken = true;
388
389 break;
390 }
391 }
392
393 if (hasToken) {
394 sb.append(StringPool.UNDERLINE);
395 }
396 else {
397 sb.append(c);
398 }
399 }
400
401 return sb.toString();
402 }
403
404 @Override
405 public String escapeXPathAttribute(String xPathAttribute) {
406 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
407 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
408
409 if (hasQuote && hasApostrophe) {
410 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
411
412 return "concat('".concat(
413 StringUtil.merge(parts, "', \"'\", '")).concat("')");
414 }
415
416 if (hasQuote) {
417 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
418 StringPool.APOSTROPHE);
419 }
420
421 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
422 }
423
424
437 @Override
438 public String extractText(String html) {
439 if (html == null) {
440 return null;
441 }
442
443 Source source = new Source(html);
444
445 TextExtractor textExtractor = source.getTextExtractor();
446
447 return textExtractor.toString();
448 }
449
450 @Override
451 public String fromInputSafe(String text) {
452 return StringUtil.replace(text, "&", "&");
453 }
454
455 @Override
456 public String getAUICompatibleId(String text) {
457 if (Validator.isNull(text)) {
458 return text;
459 }
460
461 StringBundler sb = null;
462
463 int lastReplacementIndex = 0;
464
465 for (int i = 0; i < text.length(); i++) {
466 char c = text.charAt(i);
467
468 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
469 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
470 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
471 (c != CharPool.NO_BREAK_SPACE))) {
472
473 continue;
474 }
475
476 if (sb == null) {
477 sb = new StringBundler();
478 }
479
480 if (i > lastReplacementIndex) {
481 sb.append(text.substring(lastReplacementIndex, i));
482 }
483
484 sb.append(CharPool.UNDERLINE);
485
486 if (c != CharPool.UNDERLINE) {
487 sb.append(StringUtil.toHexString(c));
488 }
489
490 sb.append(CharPool.UNDERLINE);
491
492 lastReplacementIndex = i + 1;
493 }
494
495 if (sb == null) {
496 return text;
497 }
498
499 if (lastReplacementIndex < text.length()) {
500 sb.append(text.substring(lastReplacementIndex));
501 }
502
503 return sb.toString();
504 }
505
506
522 @Override
523 public String render(String html) {
524 if (html == null) {
525 return null;
526 }
527
528 Source source = new Source(html);
529
530 Renderer renderer = source.getRenderer();
531
532 return renderer.toString();
533 }
534
535
544 @Deprecated
545 @Override
546 public String replaceMsWordCharacters(String text) {
547 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
548 }
549
550
558 @Override
559 public String replaceNewLine(String html) {
560 if (html == null) {
561 return null;
562 }
563
564 html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
565
566 return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
567 }
568
569
585 @Override
586 public String stripBetween(String text, String tag) {
587 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
588 }
589
590
597 @Override
598 public String stripComments(String text) {
599 return StringUtil.stripBetween(text, "<!--", "-->");
600 }
601
602 @Override
603 public String stripHtml(String text) {
604 if (text == null) {
605 return null;
606 }
607
608 text = stripComments(text);
609
610 StringBuilder sb = new StringBuilder(text.length());
611
612 int x = 0;
613 int y = text.indexOf("<");
614
615 while (y != -1) {
616 sb.append(text.substring(x, y));
617 sb.append(StringPool.SPACE);
618
619
620
621 if (isTag(_TAG_SCRIPT, text, y + 1)) {
622 y = stripTag(_TAG_SCRIPT, text, y);
623 }
624 else if (isTag(_TAG_STYLE, text, y + 1)) {
625 y = stripTag(_TAG_STYLE, text, y);
626 }
627
628 x = text.indexOf(">", y);
629
630 if (x == -1) {
631 break;
632 }
633
634 x++;
635
636 if (x < y) {
637
638
639
640 break;
641 }
642
643 y = text.indexOf("<", x);
644 }
645
646 if (y == -1) {
647 sb.append(text.substring(x));
648 }
649
650 return sb.toString();
651 }
652
653
665 @Override
666 public String toInputSafe(String text) {
667 return StringUtil.replace(
668 text,
669 new String[] {"&", "\""},
670 new String[] {"&", """});
671 }
672
673 @Override
674 public String unescape(String text) {
675 return StringUtil.replace(text, "&", ";", _unescapeMap);
676 }
677
678 @Override
679 public String unescapeCDATA(String text) {
680 if (text == null) {
681 return null;
682 }
683
684 if (text.length() == 0) {
685 return StringPool.BLANK;
686 }
687
688 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
689 text = StringUtil.replace(text, "]]>", "]]>");
690
691 return text;
692 }
693
694 @Override
695 public String wordBreak(String text, int columns) {
696 StringBundler sb = new StringBundler();
697
698 int length = 0;
699 int lastWrite = 0;
700 int pos = 0;
701
702 Matcher matcher = _pattern.matcher(text);
703
704 while (matcher.find()) {
705 if (matcher.start() < pos) {
706 continue;
707 }
708
709 while ((length + matcher.start() - pos) >= columns) {
710 pos += columns - length;
711
712 sb.append(text.substring(lastWrite, pos));
713 sb.append("<wbr/>­");
714
715 length = 0;
716 lastWrite = pos;
717 }
718
719 length += matcher.start() - pos;
720
721 String group = matcher.group();
722
723 if (group.equals(StringPool.AMPERSAND)) {
724 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
725
726 if (x != -1) {
727 length++;
728 pos = x + 1;
729 }
730
731 continue;
732 }
733
734 if (group.equals(StringPool.LESS_THAN)) {
735 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
736
737 if (x != -1) {
738 pos = x + 1;
739 }
740
741 continue;
742 }
743
744 if (group.equals(StringPool.SPACE) ||
745 group.equals(StringPool.NEW_LINE)) {
746
747 length = 0;
748 pos = matcher.start() + 1;
749 }
750 }
751
752 sb.append(text.substring(lastWrite));
753
754 return sb.toString();
755 }
756
757 protected boolean isTag(char[] tag, String text, int pos) {
758 if ((pos + tag.length + 1) <= text.length()) {
759 char item = '\0';
760
761 for (int i = 0; i < tag.length; i++) {
762 item = text.charAt(pos++);
763
764 if (Character.toLowerCase(item) != tag[i]) {
765 return false;
766 }
767 }
768
769 item = text.charAt(pos);
770
771
772
773 return !Character.isLetter(item);
774 }
775 else {
776 return false;
777 }
778 }
779
780 protected int stripTag(char[] tag, String text, int pos) {
781 int x = pos + _TAG_SCRIPT.length;
782
783
784
785 x = text.indexOf(">", x);
786
787 if (x < 0) {
788 return pos;
789 }
790
791
792
793 if (text.charAt(x-1) == '/') {
794 return pos;
795 }
796
797
798
799 while (true) {
800 x = text.indexOf("</", x);
801
802 if (x >= 0) {
803 if (isTag(tag, text, x + 2)) {
804 pos = x;
805
806 break;
807 }
808 else {
809
810
811
812 x += 2;
813 }
814 }
815 else {
816 break;
817 }
818 }
819
820 return pos;
821 }
822
823 private static final String[] _MS_WORD_HTML = new String[] {
824 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
825 };
826
827 private static final String[] _MS_WORD_UNICODE = new String[] {
828 "\u00ae", "\u2019", "\u201c", "\u201d"
829 };
830
831 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
832
833 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
834
835
836
837 private static final char[] _XPATH_TOKENS = {
838 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
839 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232
840 };
841
842 private static final Map<String, String> _unescapeMap = new HashMap<>();
843
844 static {
845 _unescapeMap.put("lt", "<");
846 _unescapeMap.put("gt", ">");
847 _unescapeMap.put("amp", "&");
848 _unescapeMap.put("rsquo", "\u2019");
849 _unescapeMap.put("#034", "\"");
850 _unescapeMap.put("#039", "'");
851 _unescapeMap.put("#040", "(");
852 _unescapeMap.put("#041", ")");
853 _unescapeMap.put("#044", ",");
854 _unescapeMap.put("#035", "#");
855 _unescapeMap.put("#037", "%");
856 _unescapeMap.put("#059", ";");
857 _unescapeMap.put("#061", "=");
858 _unescapeMap.put("#043", "+");
859 _unescapeMap.put("#045", "-");
860 }
861
862 private final Pattern _pattern = Pattern.compile("([\\s<&]|$)");
863
864 }