001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.HashMap;
027 import java.util.Map;
028 import java.util.regex.Matcher;
029 import java.util.regex.Pattern;
030
031 import net.htmlparser.jericho.Renderer;
032 import net.htmlparser.jericho.Source;
033 import net.htmlparser.jericho.TextExtractor;
034
035
049 @DoPrivileged
050 public class HtmlImpl implements Html {
051
052 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
053
054 public static final int ESCAPE_MODE_CSS = 2;
055
056 public static final int ESCAPE_MODE_JS = 3;
057
058 public static final int ESCAPE_MODE_TEXT = 4;
059
060 public static final int ESCAPE_MODE_URL = 5;
061
062
072 @Override
073 public String buildData(Map<String, Object> data) {
074 if ((data == null) || data.isEmpty()) {
075 return StringPool.BLANK;
076 }
077
078 StringBundler sb = new StringBundler(data.size() * 5);
079
080 for (Map.Entry<String, Object> entry : data.entrySet()) {
081 sb.append("data-");
082 sb.append(entry.getKey());
083 sb.append("=\"");
084 sb.append(escapeAttribute(String.valueOf(entry.getValue())));
085 sb.append("\" ");
086 }
087
088 return sb.toString();
089 }
090
091
098 @Override
099 public String escape(String text) {
100 if (text == null) {
101 return null;
102 }
103
104 if (text.length() == 0) {
105 return StringPool.BLANK;
106 }
107
108
109
110
111
112 return StringUtil.replace(
113 text,
114 new char[] {
115 '<', '>', '&', '"', '\'', '\u00bb', '\u2013', '\u2014', '\u2028'
116 },
117 new String[] {
118 "<", ">", "&", """, "'", "»",
119 "–", "—", "
"
120 });
121 }
122
123
139 @Override
140 public String escape(String text, int mode) {
141 if (text == null) {
142 return null;
143 }
144
145 if (text.length() == 0) {
146 return StringPool.BLANK;
147 }
148
149 String prefix = StringPool.BLANK;
150 String postfix = StringPool.BLANK;
151
152 if (mode == ESCAPE_MODE_ATTRIBUTE) {
153 prefix = "&#x";
154 postfix = StringPool.SEMICOLON;
155 }
156 else if (mode == ESCAPE_MODE_CSS) {
157 prefix = StringPool.BACK_SLASH;
158 }
159 else if (mode == ESCAPE_MODE_JS) {
160 prefix = "\\x";
161 }
162 else if (mode == ESCAPE_MODE_URL) {
163 return HttpUtil.encodeURL(text, true);
164 }
165 else {
166 return escape(text);
167 }
168
169 StringBuilder sb = new StringBuilder(text.length());
170
171 for (int i = 0; i < text.length(); i++) {
172 char c = text.charAt(i);
173
174 if ((c > 255) || (c == CharPool.DASH) ||
175 (c == CharPool.UNDERLINE) || Character.isLetterOrDigit(c)) {
176
177 sb.append(c);
178 }
179 else {
180 sb.append(prefix);
181
182 String hexString = StringUtil.toHexString(c);
183
184 if (hexString.length() == 1) {
185 sb.append(StringPool.ASCII_TABLE[48]);
186 }
187
188 sb.append(hexString);
189 sb.append(postfix);
190 }
191 }
192
193 if (sb.length() == text.length()) {
194 return text;
195 }
196
197 return sb.toString();
198 }
199
200
208 @Override
209 public String escapeAttribute(String attribute) {
210 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
211 }
212
213
220 @Override
221 public String escapeCSS(String css) {
222 return escape(css, ESCAPE_MODE_CSS);
223 }
224
225
233 @Override
234 public String escapeHREF(String href) {
235 if (href == null) {
236 return null;
237 }
238
239 if (href.length() == 0) {
240 return StringPool.BLANK;
241 }
242
243 int index = href.indexOf(StringPool.COLON);
244
245 if (index == 4) {
246 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
247
248 if (protocol.equals("data")) {
249 href = StringUtil.replaceFirst(href, CharPool.COLON, "%3a");
250 }
251 }
252 else if (index == 10) {
253 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
254
255 if (protocol.equals("javascript")) {
256 href = StringUtil.replaceFirst(href, CharPool.COLON, "%3a");
257 }
258 }
259
260 return escapeAttribute(href);
261 }
262
263
271 @Override
272 public String escapeJS(String js) {
273 return escape(js, ESCAPE_MODE_JS);
274 }
275
276 @Override
277 public String escapeJSLink(String link) {
278 if (Validator.isNull(link)) {
279 return StringPool.BLANK;
280 }
281
282 if (link.indexOf(StringPool.COLON) == 10) {
283 String protocol = StringUtil.toLowerCase(link.substring(0, 10));
284
285 if (protocol.equals("javascript")) {
286 link = StringUtil.replaceFirst(link, CharPool.COLON, "%3a");
287 }
288 }
289
290 return link;
291 }
292
293
300 @Override
301 public String escapeURL(String url) {
302 return escape(url, ESCAPE_MODE_URL);
303 }
304
305 @Override
306 public String escapeXPath(String xPath) {
307 if (Validator.isNull(xPath)) {
308 return xPath;
309 }
310
311 StringBuilder sb = new StringBuilder(xPath.length());
312
313 for (int i = 0; i < xPath.length(); i++) {
314 char c = xPath.charAt(i);
315
316 boolean hasToken = false;
317
318 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
319 if (c == _XPATH_TOKENS[j]) {
320 hasToken = true;
321
322 break;
323 }
324 }
325
326 if (hasToken) {
327 sb.append(StringPool.UNDERLINE);
328 }
329 else {
330 sb.append(c);
331 }
332 }
333
334 return sb.toString();
335 }
336
337 @Override
338 public String escapeXPathAttribute(String xPathAttribute) {
339 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
340 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
341
342 if (hasQuote && hasApostrophe) {
343 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
344
345 return "concat('".concat(
346 StringUtil.merge(parts, "', \"'\", '")).concat("')");
347 }
348
349 if (hasQuote) {
350 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
351 StringPool.APOSTROPHE);
352 }
353
354 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
355 }
356
357
370 @Override
371 public String extractText(String html) {
372 if (html == null) {
373 return null;
374 }
375
376 Source source = new Source(html);
377
378 TextExtractor textExtractor = source.getTextExtractor();
379
380 return textExtractor.toString();
381 }
382
383 @Override
384 public String fromInputSafe(String text) {
385 return StringUtil.replace(text, "&", "&");
386 }
387
388 @Override
389 public String getAUICompatibleId(String text) {
390 if (Validator.isNull(text)) {
391 return text;
392 }
393
394 StringBundler sb = null;
395
396 int lastReplacementIndex = 0;
397
398 for (int i = 0; i < text.length(); i++) {
399 char c = text.charAt(i);
400
401 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
402 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
403 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
404 (c != CharPool.NO_BREAK_SPACE))) {
405
406 continue;
407 }
408
409 if (sb == null) {
410 sb = new StringBundler();
411 }
412
413 if (i > lastReplacementIndex) {
414 sb.append(text.substring(lastReplacementIndex, i));
415 }
416
417 sb.append(CharPool.UNDERLINE);
418
419 if (c != CharPool.UNDERLINE) {
420 sb.append(StringUtil.toHexString(c));
421 }
422
423 sb.append(CharPool.UNDERLINE);
424
425 lastReplacementIndex = i + 1;
426 }
427
428 if (sb == null) {
429 return text;
430 }
431
432 if (lastReplacementIndex < text.length()) {
433 sb.append(text.substring(lastReplacementIndex));
434 }
435
436 return sb.toString();
437 }
438
439
455 @Override
456 public String render(String html) {
457 if (html == null) {
458 return null;
459 }
460
461 Source source = new Source(html);
462
463 Renderer renderer = source.getRenderer();
464
465 return renderer.toString();
466 }
467
468
477 @Deprecated
478 @Override
479 public String replaceMsWordCharacters(String text) {
480 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
481 }
482
483
491 @Override
492 public String replaceNewLine(String html) {
493 if (html == null) {
494 return null;
495 }
496
497 html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
498
499 return StringUtil.replace(html, CharPool.NEW_LINE, "<br />");
500 }
501
502
518 @Override
519 public String stripBetween(String text, String tag) {
520 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
521 }
522
523
530 @Override
531 public String stripComments(String text) {
532 return StringUtil.stripBetween(text, "<!--", "-->");
533 }
534
535 @Override
536 public String stripHtml(String text) {
537 if (text == null) {
538 return null;
539 }
540
541 text = stripComments(text);
542
543 StringBuilder sb = new StringBuilder(text.length());
544
545 int x = 0;
546 int y = text.indexOf("<");
547
548 while (y != -1) {
549 sb.append(text.substring(x, y));
550 sb.append(StringPool.SPACE);
551
552
553
554 if (isTag(_TAG_SCRIPT, text, y + 1)) {
555 y = stripTag(_TAG_SCRIPT, text, y);
556 }
557 else if (isTag(_TAG_STYLE, text, y + 1)) {
558 y = stripTag(_TAG_STYLE, text, y);
559 }
560
561 x = text.indexOf(">", y);
562
563 if (x == -1) {
564 break;
565 }
566
567 x++;
568
569 if (x < y) {
570
571
572
573 break;
574 }
575
576 y = text.indexOf("<", x);
577 }
578
579 if (y == -1) {
580 sb.append(text.substring(x));
581 }
582
583 return sb.toString();
584 }
585
586
598 @Override
599 public String toInputSafe(String text) {
600 return StringUtil.replace(
601 text, new char[] {'&', '\"'}, new String[] {"&", """});
602 }
603
604 @Override
605 public String unescape(String text) {
606 return StringUtil.replace(text, "&", ";", _unescapeMap);
607 }
608
609 @Override
610 public String unescapeCDATA(String text) {
611 if (text == null) {
612 return null;
613 }
614
615 if (text.length() == 0) {
616 return StringPool.BLANK;
617 }
618
619 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
620 text = StringUtil.replace(text, "]]>", "]]>");
621
622 return text;
623 }
624
625 @Override
626 public String wordBreak(String text, int columns) {
627 StringBundler sb = new StringBundler();
628
629 int length = 0;
630 int lastWrite = 0;
631 int pos = 0;
632
633 Matcher matcher = _pattern.matcher(text);
634
635 while (matcher.find()) {
636 if (matcher.start() < pos) {
637 continue;
638 }
639
640 while ((length + matcher.start() - pos) >= columns) {
641 pos += columns - length;
642
643 sb.append(text.substring(lastWrite, pos));
644 sb.append("<wbr/>­");
645
646 length = 0;
647 lastWrite = pos;
648 }
649
650 length += matcher.start() - pos;
651
652 String group = matcher.group();
653
654 if (group.equals(StringPool.AMPERSAND)) {
655 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
656
657 if (x != -1) {
658 length++;
659 pos = x + 1;
660 }
661
662 continue;
663 }
664
665 if (group.equals(StringPool.LESS_THAN)) {
666 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
667
668 if (x != -1) {
669 pos = x + 1;
670 }
671
672 continue;
673 }
674
675 if (group.equals(StringPool.SPACE) ||
676 group.equals(StringPool.NEW_LINE)) {
677
678 length = 0;
679 pos = matcher.start() + 1;
680 }
681 }
682
683 sb.append(text.substring(lastWrite));
684
685 return sb.toString();
686 }
687
688 protected boolean isTag(char[] tag, String text, int pos) {
689 if ((pos + tag.length + 1) <= text.length()) {
690 char item = '\0';
691
692 for (int i = 0; i < tag.length; i++) {
693 item = text.charAt(pos++);
694
695 if (Character.toLowerCase(item) != tag[i]) {
696 return false;
697 }
698 }
699
700 item = text.charAt(pos);
701
702
703
704 return !Character.isLetter(item);
705 }
706 else {
707 return false;
708 }
709 }
710
711 protected int stripTag(char[] tag, String text, int pos) {
712 int x = pos + _TAG_SCRIPT.length;
713
714
715
716 x = text.indexOf(">", x);
717
718 if (x < 0) {
719 return pos;
720 }
721
722
723
724 if (text.charAt(x-1) == '/') {
725 return pos;
726 }
727
728
729
730 while (true) {
731 x = text.indexOf("</", x);
732
733 if (x >= 0) {
734 if (isTag(tag, text, x + 2)) {
735 pos = x;
736
737 break;
738 }
739 else {
740
741
742
743 x += 2;
744 }
745 }
746 else {
747 break;
748 }
749 }
750
751 return pos;
752 }
753
754 private static final String[] _MS_WORD_HTML = new String[] {
755 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
756 };
757
758 private static final String[] _MS_WORD_UNICODE =
759 new String[] {"\u00ae", "\u2019", "\u201c", "\u201d"};
760
761 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
762
763 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
764
765
766
767 private static final char[] _XPATH_TOKENS = {
768 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
769 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232
770 };
771
772 private static final Map<String, String> _unescapeMap = new HashMap<>();
773
774 static {
775 _unescapeMap.put("lt", "<");
776 _unescapeMap.put("gt", ">");
777 _unescapeMap.put("amp", "&");
778 _unescapeMap.put("rsquo", "\u2019");
779 _unescapeMap.put("#034", "\"");
780 _unescapeMap.put("#039", "'");
781 _unescapeMap.put("#040", "(");
782 _unescapeMap.put("#041", ")");
783 _unescapeMap.put("#044", ",");
784 _unescapeMap.put("#035", "#");
785 _unescapeMap.put("#037", "%");
786 _unescapeMap.put("#059", ";");
787 _unescapeMap.put("#061", "=");
788 _unescapeMap.put("#043", "+");
789 _unescapeMap.put("#045", "-");
790 }
791
792 private final Pattern _pattern = Pattern.compile("([\\s<&]|$)");
793
794 }