001
014
015 package com.liferay.portal.util;
016
017 import static java.lang.Character.MAX_SURROGATE;
018 import static java.lang.Character.MIN_SURROGATE;
019
020 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
021 import com.liferay.portal.kernel.util.CharPool;
022 import com.liferay.portal.kernel.util.Html;
023 import com.liferay.portal.kernel.util.HttpUtil;
024 import com.liferay.portal.kernel.util.StringBundler;
025 import com.liferay.portal.kernel.util.StringPool;
026 import com.liferay.portal.kernel.util.StringUtil;
027 import com.liferay.portal.kernel.util.Validator;
028
029 import java.util.regex.Matcher;
030 import java.util.regex.Pattern;
031
032 import net.htmlparser.jericho.Renderer;
033 import net.htmlparser.jericho.Source;
034 import net.htmlparser.jericho.TextExtractor;
035
036
044 @DoPrivileged
045 public class HtmlImpl implements Html {
046
047 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
048
049 public static final int ESCAPE_MODE_CSS = 2;
050
051 public static final int ESCAPE_MODE_JS = 3;
052
053 public static final int ESCAPE_MODE_TEXT = 4;
054
055 public static final int ESCAPE_MODE_URL = 5;
056
057
064 @Override
065 public String escape(String text) {
066 if (text == null) {
067 return null;
068 }
069
070 if (text.length() == 0) {
071 return StringPool.BLANK;
072 }
073
074
075
076
077
078 StringBundler sb = null;
079
080 int lastReplacementIndex = 0;
081
082 for (int i = 0; i < text.length(); i++) {
083 char c = text.charAt(i);
084
085 String replacement = null;
086
087 if (c == '<') {
088 replacement = "<";
089 }
090 else if (c == '>') {
091 replacement = ">";
092 }
093 else if (c == '&') {
094 replacement = "&";
095 }
096 else if (c == '"') {
097 replacement = """;
098 }
099 else if (c == '\'') {
100 replacement = "'";
101 }
102 else if (c == '\u00bb') {
103 replacement = "»";
104 }
105 else if (c == '\u2013') {
106 replacement = "–";
107 }
108 else if (c == '\u2014') {
109 replacement = "—";
110 }
111 else if (c == '\u2028') {
112 replacement = "舲";
113 }
114 else if (!_isValidXmlCharacter(c) ||
115 _isUnicodeCompatibilityCharacter(c)) {
116
117 replacement = StringPool.SPACE;
118 }
119
120 if (replacement != null) {
121 if (sb == null) {
122 sb = new StringBundler();
123 }
124
125 if (i > lastReplacementIndex) {
126 sb.append(text.substring(lastReplacementIndex, i));
127 }
128
129 sb.append(replacement);
130
131 lastReplacementIndex = i + 1;
132 }
133 }
134
135 if (sb == null) {
136 return text;
137 }
138
139 if (lastReplacementIndex < text.length()) {
140 sb.append(text.substring(lastReplacementIndex));
141 }
142
143 return sb.toString();
144 }
145
146 @Override
147 public String escape(String text, int type) {
148 if (text == null) {
149 return null;
150 }
151
152 if (text.length() == 0) {
153 return StringPool.BLANK;
154 }
155
156 String prefix = StringPool.BLANK;
157 String postfix = StringPool.BLANK;
158
159 if (type == ESCAPE_MODE_ATTRIBUTE) {
160 prefix = "&#x";
161 postfix = StringPool.SEMICOLON;
162 }
163 else if (type == ESCAPE_MODE_CSS) {
164 prefix = StringPool.BACK_SLASH;
165 }
166 else if (type == ESCAPE_MODE_JS) {
167 prefix = "\\x";
168 }
169 else if (type == ESCAPE_MODE_URL) {
170 return HttpUtil.encodeURL(text, true);
171 }
172 else {
173 return escape(text);
174 }
175
176 StringBuilder sb = null;
177 char[] hexBuffer = new char[4];
178 int lastReplacementIndex = 0;
179
180 for (int i = 0; i < text.length(); i++) {
181 char c = text.charAt(i);
182
183 if (c < _VALID_CHARS.length) {
184 if (!_VALID_CHARS[c]) {
185 if (sb == null) {
186 sb = new StringBuilder(text.length() + 64);
187 }
188
189 if (i > lastReplacementIndex) {
190 sb.append(text, lastReplacementIndex, i);
191 }
192
193 sb.append(prefix);
194
195 _appendHexChars(sb, hexBuffer, c);
196
197 sb.append(postfix);
198
199 lastReplacementIndex = i + 1;
200 }
201 }
202 else if ((type == ESCAPE_MODE_ATTRIBUTE) &&
203 (!_isValidXmlCharacter(c) ||
204 _isUnicodeCompatibilityCharacter(c))) {
205
206 if (sb == null) {
207 sb = new StringBuilder(text.length() + 64);
208 }
209
210 if (i > lastReplacementIndex) {
211 sb.append(text, lastReplacementIndex, i);
212 }
213
214 sb.append(CharPool.SPACE);
215
216 lastReplacementIndex = i + 1;
217 }
218 }
219
220 if (sb == null) {
221 return text;
222 }
223
224 if (lastReplacementIndex < text.length()) {
225 sb.append(text, lastReplacementIndex, text.length());
226 }
227
228 return sb.toString();
229 }
230
231 @Override
232 public String escapeAttribute(String attribute) {
233 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
234 }
235
236 @Override
237 public String escapeCSS(String css) {
238 return escape(css, ESCAPE_MODE_CSS);
239 }
240
241 @Override
242 public String escapeHREF(String href) {
243 if (href == null) {
244 return null;
245 }
246
247 if (href.length() == 0) {
248 return StringPool.BLANK;
249 }
250
251 int index = href.indexOf(StringPool.COLON);
252
253 if (index == 4) {
254 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
255
256 if (protocol.equals("data")) {
257 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
258 }
259 }
260 else if (index == 10) {
261 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
262
263 if (protocol.equals("javascript")) {
264 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
265 }
266 }
267
268 return escapeAttribute(href);
269 }
270
271 @Override
272 public String escapeJS(String js) {
273 return escape(js, ESCAPE_MODE_JS);
274 }
275
276 @Override
277 public String escapeURL(String url) {
278 return escape(url, ESCAPE_MODE_URL);
279 }
280
281 @Override
282 public String escapeXPath(String xPath) {
283 if (Validator.isNull(xPath)) {
284 return xPath;
285 }
286
287 StringBuilder sb = new StringBuilder(xPath.length());
288
289 for (int i = 0; i < xPath.length(); i++) {
290 char c = xPath.charAt(i);
291
292 boolean hasToken = false;
293
294 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
295 if (c == _XPATH_TOKENS[j]) {
296 hasToken = true;
297
298 break;
299 }
300 }
301
302 if (hasToken) {
303 sb.append(StringPool.UNDERLINE);
304 }
305 else {
306 sb.append(c);
307 }
308 }
309
310 return sb.toString();
311 }
312
313 @Override
314 public String escapeXPathAttribute(String xPathAttribute) {
315 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
316 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
317
318 if (hasQuote && hasApostrophe) {
319 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
320
321 return "concat('".concat(
322 StringUtil.merge(parts, "', \"'\", '")).concat("')");
323 }
324
325 if (hasQuote) {
326 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
327 StringPool.APOSTROPHE);
328 }
329
330 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
331 }
332
333 @Override
334 public String extractText(String html) {
335 if (html == null) {
336 return null;
337 }
338
339 Source source = new Source(html);
340
341 TextExtractor textExtractor = source.getTextExtractor();
342
343 return textExtractor.toString();
344 }
345
346 @Override
347 public String fromInputSafe(String text) {
348 return StringUtil.replace(text, "&", "&");
349 }
350
351 @Override
352 public String getAUICompatibleId(String text) {
353 if (Validator.isNull(text)) {
354 return text;
355 }
356
357 StringBundler sb = null;
358
359 int lastReplacementIndex = 0;
360
361 for (int i = 0; i < text.length(); i++) {
362 char c = text.charAt(i);
363
364 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
365 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
366 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
367 (c != CharPool.NO_BREAK_SPACE))) {
368
369 continue;
370 }
371
372 if (sb == null) {
373 sb = new StringBundler();
374 }
375
376 if (i > lastReplacementIndex) {
377 sb.append(text.substring(lastReplacementIndex, i));
378 }
379
380 sb.append(StringPool.UNDERLINE);
381
382 if (c != CharPool.UNDERLINE) {
383 sb.append(StringUtil.toHexString(c));
384 }
385
386 sb.append(StringPool.UNDERLINE);
387
388 lastReplacementIndex = i + 1;
389 }
390
391 if (sb == null) {
392 return text;
393 }
394
395 if (lastReplacementIndex < text.length()) {
396 sb.append(text.substring(lastReplacementIndex));
397 }
398
399 return sb.toString();
400 }
401
402 @Deprecated
403 @Override
404 public String render(String html) {
405 if (html == null) {
406 return null;
407 }
408
409 Source source = new Source(html);
410
411 Renderer renderer = source.getRenderer();
412
413 return renderer.toString();
414 }
415
416 @Override
417 public String replaceMsWordCharacters(String text) {
418 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
419 }
420
421 @Override
422 public String replaceNewLine(String text) {
423 if (text == null) {
424 return null;
425 }
426
427 return text.replaceAll("\r?\n", "<br />");
428 }
429
430 @Override
431 public String stripBetween(String text, String tag) {
432 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
433 }
434
435 @Override
436 public String stripComments(String text) {
437 return StringUtil.stripBetween(text, "<!--", "-->");
438 }
439
440 @Override
441 public String stripHtml(String text) {
442 if (text == null) {
443 return null;
444 }
445
446 text = stripComments(text);
447
448 StringBuilder sb = new StringBuilder(text.length());
449
450 int x = 0;
451 int y = text.indexOf("<");
452
453 while (y != -1) {
454 sb.append(text.substring(x, y));
455 sb.append(StringPool.SPACE);
456
457
458
459 if (isTag(_TAG_SCRIPT, text, y + 1)) {
460 y = stripTag(_TAG_SCRIPT, text, y);
461 }
462 else if (isTag(_TAG_STYLE, text, y + 1)) {
463 y = stripTag(_TAG_STYLE, text, y);
464 }
465
466 x = text.indexOf(">", y);
467
468 if (x == -1) {
469 break;
470 }
471
472 x++;
473
474 if (x < y) {
475
476
477
478 break;
479 }
480
481 y = text.indexOf("<", x);
482 }
483
484 if (y == -1) {
485 sb.append(text.substring(x));
486 }
487
488 return sb.toString();
489 }
490
491 @Override
492 public String toInputSafe(String text) {
493 return StringUtil.replace(
494 text,
495 new String[] {"&", "\""},
496 new String[] {"&", """});
497 }
498
499 @Override
500 public String unescape(String text) {
501 if (text == null) {
502 return null;
503 }
504
505 if (text.length() == 0) {
506 return StringPool.BLANK;
507 }
508
509
510
511 text = StringUtil.replace(text, "<", "<");
512 text = StringUtil.replace(text, ">", ">");
513 text = StringUtil.replace(text, "&", "&");
514 text = StringUtil.replace(text, "’", "\u2019");
515 text = StringUtil.replace(text, """, "\"");
516 text = StringUtil.replace(text, "'", "'");
517 text = StringUtil.replace(text, "(", "(");
518 text = StringUtil.replace(text, ")", ")");
519 text = StringUtil.replace(text, ",", ",");
520 text = StringUtil.replace(text, "#", "#");
521 text = StringUtil.replace(text, "%", "%");
522 text = StringUtil.replace(text, ";", ";");
523 text = StringUtil.replace(text, "=", "=");
524 text = StringUtil.replace(text, "+", "+");
525 text = StringUtil.replace(text, "-", "-");
526
527 return text;
528 }
529
530 @Override
531 public String unescapeCDATA(String text) {
532 if (text == null) {
533 return null;
534 }
535
536 if (text.length() == 0) {
537 return StringPool.BLANK;
538 }
539
540 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
541 text = StringUtil.replace(text, "]]>", "]]>");
542
543 return text;
544 }
545
546 @Override
547 public String wordBreak(String text, int columns) {
548 StringBundler sb = new StringBundler();
549
550 int length = 0;
551 int lastWrite = 0;
552 int pos = 0;
553
554 Pattern pattern = Pattern.compile("([\\s<&]|$)");
555
556 Matcher matcher = pattern.matcher(text);
557
558 while (matcher.find()) {
559 if (matcher.start() < pos) {
560 continue;
561 }
562
563 while ((length + matcher.start() - pos) >= columns) {
564 pos += columns - length;
565
566 sb.append(text.substring(lastWrite, pos));
567 sb.append("<wbr/>­");
568
569 length = 0;
570 lastWrite = pos;
571 }
572
573 length += matcher.start() - pos;
574
575 String group = matcher.group();
576
577 if (group.equals(StringPool.AMPERSAND)) {
578 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
579
580 if (x != -1) {
581 length++;
582 pos = x + 1;
583 }
584
585 continue;
586 }
587
588 if (group.equals(StringPool.LESS_THAN)) {
589 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
590
591 if (x != -1) {
592 pos = x + 1;
593 }
594
595 continue;
596 }
597
598 if (group.equals(StringPool.SPACE) ||
599 group.equals(StringPool.NEW_LINE)) {
600
601 length = 0;
602 pos = matcher.start() + 1;
603 }
604 }
605
606 sb.append(text.substring(lastWrite));
607
608 return sb.toString();
609 }
610
611 protected boolean isTag(char[] tag, String text, int pos) {
612 if ((pos + tag.length + 1) <= text.length()) {
613 char item;
614
615 for (int i = 0; i < tag.length; i++) {
616 item = text.charAt(pos++);
617
618 if (Character.toLowerCase(item) != tag[i]) {
619 return false;
620 }
621 }
622
623 item = text.charAt(pos);
624
625
626
627 return !Character.isLetter(item);
628 }
629 else {
630 return false;
631 }
632 }
633
634 protected int stripTag(char[] tag, String text, int pos) {
635 int x = pos + _TAG_SCRIPT.length;
636
637
638
639 x = text.indexOf(">", x);
640
641 if (x < 0) {
642 return pos;
643 }
644
645
646
647 if (text.charAt(x-1) == '/') {
648 return pos;
649 }
650
651
652
653 while (true) {
654 x = text.indexOf("</", x);
655
656 if (x >= 0) {
657 if (isTag(tag, text, x + 2)) {
658 pos = x;
659
660 break;
661 }
662 else {
663
664
665
666 x += 2;
667 }
668 }
669 else {
670 break;
671 }
672 }
673
674 return pos;
675 }
676
677 private static void _appendHexChars(
678 StringBuilder sb, char[] buffer, char c) {
679
680 int index = buffer.length;
681
682 do {
683 buffer[--index] = _HEX_DIGITS[c & 15];
684
685 c >>>= 4;
686 }
687 while (c != 0);
688
689 if (index == (buffer.length - 1)) {
690 sb.append(CharPool.NUMBER_0);
691 sb.append(buffer[index]);
692
693 return;
694 }
695
696 sb.append(buffer, index, buffer.length - index);
697 }
698
699 private boolean _isUnicodeCompatibilityCharacter(char c) {
700 if (((c >= '\u007f') && (c <= '\u0084')) ||
701 ((c >= '\u0086') && (c <= '\u009f')) ||
702 ((c >= '\ufdd0') && (c <= '\ufdef'))) {
703
704 return true;
705 }
706
707 return false;
708 }
709
710 private boolean _isValidXmlCharacter(char c) {
711 if (((c >= CharPool.SPACE) && (c <= '\ud7ff')) ||
712 ((c >= '\ue000') && (c <= '\ufffd')) ||
713 ((c >= MIN_SURROGATE) && (c < (MAX_SURROGATE + 1))) ||
714 (c == CharPool.TAB) || (c == CharPool.NEW_LINE) ||
715 (c == CharPool.RETURN)) {
716
717 return true;
718 }
719
720 return false;
721 }
722
723 private static final char[] _HEX_DIGITS = {
724 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
725 'e', 'f'
726 };
727
728 private static final String[] _MS_WORD_HTML = new String[] {
729 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
730 };
731
732 private static final String[] _MS_WORD_UNICODE = new String[] {
733 "\u00ae", "\u2019", "\u201c", "\u201d"
734 };
735
736 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
737
738 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
739
740 private static final boolean[] _VALID_CHARS = new boolean[128];
741
742
743
744 private static final char[] _XPATH_TOKENS = {
745 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
746 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
747
748 static {
749 for (int i = 'a'; i <= 'z'; i++) {
750 _VALID_CHARS[i] = true;
751 }
752
753 for (int i = 'A'; i <= 'Z'; i++) {
754 _VALID_CHARS[i] = true;
755 }
756
757 for (int i = '0'; i <= '9'; i++) {
758 _VALID_CHARS[i] = true;
759 }
760
761 _VALID_CHARS['-'] = true;
762 _VALID_CHARS['_'] = true;
763 }
764
765 }