001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
047 @DoPrivileged
048 public class HtmlImpl implements Html {
049
050 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
051
052 public static final int ESCAPE_MODE_CSS = 2;
053
054 public static final int ESCAPE_MODE_JS = 3;
055
056 public static final int ESCAPE_MODE_TEXT = 4;
057
058 public static final int ESCAPE_MODE_URL = 5;
059
060
067 @Override
068 public String escape(String text) {
069 if (text == null) {
070 return null;
071 }
072
073 if (text.length() == 0) {
074 return StringPool.BLANK;
075 }
076
077
078
079
080
081 StringBundler sb = null;
082
083 int lastReplacementIndex = 0;
084
085 for (int i = 0; i < text.length(); i++) {
086 char c = text.charAt(i);
087
088 String replacement = null;
089
090 switch (c) {
091 case '<':
092 replacement = "<";
093
094 break;
095
096 case '>':
097 replacement = ">";
098
099 break;
100
101 case '&':
102 replacement = "&";
103
104 break;
105
106 case '"':
107 replacement = """;
108
109 break;
110
111 case '\'':
112 replacement = "'";
113
114 break;
115
116 case '\u00bb':
117 replacement = "»";
118
119 break;
120
121 case '\u2013':
122 replacement = "–";
123
124 break;
125
126 case '\u2014':
127 replacement = "—";
128
129 break;
130 }
131
132 if (replacement != null) {
133 if (sb == null) {
134 sb = new StringBundler();
135 }
136
137 if (i > lastReplacementIndex) {
138 sb.append(text.substring(lastReplacementIndex, i));
139 }
140
141 sb.append(replacement);
142
143 lastReplacementIndex = i + 1;
144 }
145 }
146
147 if (sb == null) {
148 return text;
149 }
150
151 if (lastReplacementIndex < text.length()) {
152 sb.append(text.substring(lastReplacementIndex));
153 }
154
155 return sb.toString();
156 }
157
158
174 @Override
175 public String escape(String text, int mode) {
176 if (text == null) {
177 return null;
178 }
179
180 if (text.length() == 0) {
181 return StringPool.BLANK;
182 }
183
184 String prefix = StringPool.BLANK;
185 String postfix = StringPool.BLANK;
186
187 if (mode == ESCAPE_MODE_ATTRIBUTE) {
188 prefix = "&#x";
189 postfix = StringPool.SEMICOLON;
190 }
191 else if (mode == ESCAPE_MODE_CSS) {
192 prefix = StringPool.BACK_SLASH;
193 }
194 else if (mode == ESCAPE_MODE_JS) {
195 prefix = "\\x";
196 }
197 else if (mode == ESCAPE_MODE_URL) {
198 return HttpUtil.encodeURL(text, true);
199 }
200 else {
201 return escape(text);
202 }
203
204 StringBuilder sb = new StringBuilder();
205
206 for (int i = 0; i < text.length(); i++) {
207 char c = text.charAt(i);
208
209 if ((c > 255) || Character.isLetterOrDigit(c) ||
210 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
211
212 sb.append(c);
213 }
214 else {
215 sb.append(prefix);
216
217 String hexString = StringUtil.toHexString(c);
218
219 if (hexString.length() == 1) {
220 sb.append(StringPool.ASCII_TABLE[48]);
221 }
222
223 sb.append(hexString);
224 sb.append(postfix);
225 }
226 }
227
228 if (sb.length() == text.length()) {
229 return text;
230 }
231 else {
232 return sb.toString();
233 }
234 }
235
236
244 @Override
245 public String escapeAttribute(String attribute) {
246 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
247 }
248
249
256 @Override
257 public String escapeCSS(String css) {
258 return escape(css, ESCAPE_MODE_CSS);
259 }
260
261
269 @Override
270 public String escapeHREF(String href) {
271 if (href == null) {
272 return null;
273 }
274
275 if (href.length() == 0) {
276 return StringPool.BLANK;
277 }
278
279 if (href.indexOf(StringPool.COLON) == 10) {
280 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
281
282 if (protocol.equals("javascript")) {
283 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
284 }
285 }
286
287 return escapeAttribute(href);
288 }
289
290
298 @Override
299 public String escapeJS(String js) {
300 return escape(js, ESCAPE_MODE_JS);
301 }
302
303
310 @Override
311 public String escapeURL(String url) {
312 return escape(url, ESCAPE_MODE_URL);
313 }
314
315 @Override
316 public String escapeXPath(String xPath) {
317 if (Validator.isNull(xPath)) {
318 return xPath;
319 }
320
321 StringBuilder sb = new StringBuilder(xPath.length());
322
323 for (int i = 0; i < xPath.length(); i++) {
324 char c = xPath.charAt(i);
325
326 boolean hasToken = false;
327
328 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
329 if (c == _XPATH_TOKENS[j]) {
330 hasToken = true;
331
332 break;
333 }
334 }
335
336 if (hasToken) {
337 sb.append(StringPool.UNDERLINE);
338 }
339 else {
340 sb.append(c);
341 }
342 }
343
344 return sb.toString();
345 }
346
347 @Override
348 public String escapeXPathAttribute(String xPathAttribute) {
349 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
350 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
351
352 if (hasQuote && hasApostrophe) {
353 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
354
355 return "concat('".concat(
356 StringUtil.merge(parts, "', \"'\", '")).concat("')");
357 }
358
359 if (hasQuote) {
360 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
361 StringPool.APOSTROPHE);
362 }
363
364 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
365 }
366
367
380 @Override
381 public String extractText(String html) {
382 if (html == null) {
383 return null;
384 }
385
386 Source source = new Source(html);
387
388 TextExtractor textExtractor = source.getTextExtractor();
389
390 return textExtractor.toString();
391 }
392
393 @Override
394 public String fromInputSafe(String text) {
395 return StringUtil.replace(text, "&", "&");
396 }
397
398 @Override
399 public String getAUICompatibleId(String text) {
400 if (Validator.isNull(text)) {
401 return text;
402 }
403
404 StringBundler sb = null;
405
406 int lastReplacementIndex = 0;
407
408 for (int i = 0; i < text.length(); i++) {
409 char c = text.charAt(i);
410
411 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
412 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
413 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
414 (c != CharPool.NO_BREAK_SPACE))) {
415
416 continue;
417 }
418
419 if (sb == null) {
420 sb = new StringBundler();
421 }
422
423 if (i > lastReplacementIndex) {
424 sb.append(text.substring(lastReplacementIndex, i));
425 }
426
427 sb.append(CharPool.UNDERLINE);
428
429 if (c != CharPool.UNDERLINE) {
430 sb.append(StringUtil.toHexString(c));
431 }
432
433 sb.append(CharPool.UNDERLINE);
434
435 lastReplacementIndex = i + 1;
436 }
437
438 if (sb == null) {
439 return text;
440 }
441
442 if (lastReplacementIndex < text.length()) {
443 sb.append(text.substring(lastReplacementIndex));
444 }
445
446 return sb.toString();
447 }
448
449
465 @Override
466 public String render(String html) {
467 if (html == null) {
468 return null;
469 }
470
471 Source source = new Source(html);
472
473 Renderer renderer = source.getRenderer();
474
475 return renderer.toString();
476 }
477
478
487 @Deprecated
488 @Override
489 public String replaceMsWordCharacters(String text) {
490 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
491 }
492
493
501 @Override
502 public String replaceNewLine(String html) {
503 if (html == null) {
504 return null;
505 }
506
507 html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "<br />");
508
509 return StringUtil.replace(html, StringPool.NEW_LINE, "<br />");
510 }
511
512
528 @Override
529 public String stripBetween(String text, String tag) {
530 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
531 }
532
533
540 @Override
541 public String stripComments(String text) {
542 return StringUtil.stripBetween(text, "<!--", "-->");
543 }
544
545 @Override
546 public String stripHtml(String text) {
547 if (text == null) {
548 return null;
549 }
550
551 text = stripComments(text);
552
553 StringBuilder sb = new StringBuilder(text.length());
554
555 int x = 0;
556 int y = text.indexOf("<");
557
558 while (y != -1) {
559 sb.append(text.substring(x, y));
560 sb.append(StringPool.SPACE);
561
562
563
564 if (isTag(_TAG_SCRIPT, text, y + 1)) {
565 y = stripTag(_TAG_SCRIPT, text, y);
566 }
567 else if (isTag(_TAG_STYLE, text, y + 1)) {
568 y = stripTag(_TAG_STYLE, text, y);
569 }
570
571 x = text.indexOf(">", y);
572
573 if (x == -1) {
574 break;
575 }
576
577 x++;
578
579 if (x < y) {
580
581
582
583 break;
584 }
585
586 y = text.indexOf("<", x);
587 }
588
589 if (y == -1) {
590 sb.append(text.substring(x));
591 }
592
593 return sb.toString();
594 }
595
596
608 @Override
609 public String toInputSafe(String text) {
610 return StringUtil.replace(
611 text,
612 new String[] {"&", "\""},
613 new String[] {"&", """});
614 }
615
616 @Override
617 public String unescape(String text) {
618 if (text == null) {
619 return null;
620 }
621
622 if (text.length() == 0) {
623 return StringPool.BLANK;
624 }
625
626
627
628 text = StringUtil.replace(text, "<", "<");
629 text = StringUtil.replace(text, ">", ">");
630 text = StringUtil.replace(text, "&", "&");
631 text = StringUtil.replace(text, """, "\"");
632 text = StringUtil.replace(text, "'", "'");
633 text = StringUtil.replace(text, "(", "(");
634 text = StringUtil.replace(text, ")", ")");
635 text = StringUtil.replace(text, ",", ",");
636 text = StringUtil.replace(text, "#", "#");
637 text = StringUtil.replace(text, "%", "%");
638 text = StringUtil.replace(text, ";", ";");
639 text = StringUtil.replace(text, "=", "=");
640 text = StringUtil.replace(text, "+", "+");
641 text = StringUtil.replace(text, "-", "-");
642
643 return text;
644 }
645
646 @Override
647 public String unescapeCDATA(String text) {
648 if (text == null) {
649 return null;
650 }
651
652 if (text.length() == 0) {
653 return StringPool.BLANK;
654 }
655
656 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
657 text = StringUtil.replace(text, "]]>", "]]>");
658
659 return text;
660 }
661
662 @Override
663 public String wordBreak(String text, int columns) {
664 StringBundler sb = new StringBundler();
665
666 int length = 0;
667 int lastWrite = 0;
668 int pos = 0;
669
670 Matcher matcher = _pattern.matcher(text);
671
672 while (matcher.find()) {
673 if (matcher.start() < pos) {
674 continue;
675 }
676
677 while ((length + matcher.start() - pos) >= columns) {
678 pos += columns - length;
679
680 sb.append(text.substring(lastWrite, pos));
681 sb.append("<wbr/>­");
682
683 length = 0;
684 lastWrite = pos;
685 }
686
687 length += matcher.start() - pos;
688
689 String group = matcher.group();
690
691 if (group.equals(StringPool.AMPERSAND)) {
692 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
693
694 if (x != -1) {
695 length++;
696 pos = x + 1;
697 }
698
699 continue;
700 }
701
702 if (group.equals(StringPool.LESS_THAN)) {
703 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
704
705 if (x != -1) {
706 pos = x + 1;
707 }
708
709 continue;
710 }
711
712 if (group.equals(StringPool.SPACE) ||
713 group.equals(StringPool.NEW_LINE)) {
714
715 length = 0;
716 pos = matcher.start() + 1;
717 }
718 }
719
720 sb.append(text.substring(lastWrite));
721
722 return sb.toString();
723 }
724
725 protected boolean isTag(char[] tag, String text, int pos) {
726 if ((pos + tag.length + 1) <= text.length()) {
727 char item;
728
729 for (int i = 0; i < tag.length; i++) {
730 item = text.charAt(pos++);
731
732 if (Character.toLowerCase(item) != tag[i]) {
733 return false;
734 }
735 }
736
737 item = text.charAt(pos);
738
739
740
741 return !Character.isLetter(item);
742 }
743 else {
744 return false;
745 }
746 }
747
748 protected int stripTag(char[] tag, String text, int pos) {
749 int x = pos + _TAG_SCRIPT.length;
750
751
752
753 x = text.indexOf(">", x);
754
755 if (x < 0) {
756 return pos;
757 }
758
759
760
761 if (text.charAt(x-1) == '/') {
762 return pos;
763 }
764
765
766
767 while (true) {
768 x = text.indexOf("</", x);
769
770 if (x >= 0) {
771 if (isTag(tag, text, x + 2)) {
772 pos = x;
773
774 break;
775 }
776 else {
777
778
779
780 x += 2;
781 }
782 }
783 else {
784 break;
785 }
786 }
787
788 return pos;
789 }
790
791 private static final String[] _MS_WORD_HTML = new String[] {
792 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
793 };
794
795 private static final String[] _MS_WORD_UNICODE = new String[] {
796 "\u00ae", "\u2019", "\u201c", "\u201d"
797 };
798
799 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
800
801 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
802
803
804
805 private static final char[] _XPATH_TOKENS = {
806 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
807 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
808
809 private Pattern _pattern = Pattern.compile("([\\s<&]|$)");
810
811 }