001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054
061 @Override
062 public String escape(String text) {
063 if (text == null) {
064 return null;
065 }
066
067 if (text.length() == 0) {
068 return StringPool.BLANK;
069 }
070
071
072
073
074
075 StringBundler sb = null;
076
077 int lastReplacementIndex = 0;
078
079 for (int i = 0; i < text.length(); i++) {
080 char c = text.charAt(i);
081
082 String replacement = null;
083
084 switch (c) {
085 case '<':
086 replacement = "<";
087
088 break;
089
090 case '>':
091 replacement = ">";
092
093 break;
094
095 case '&':
096 replacement = "&";
097
098 break;
099
100 case '"':
101 replacement = """;
102
103 break;
104
105 case '\'':
106 replacement = "'";
107
108 break;
109
110 case '\u00bb':
111 replacement = "»";
112
113 break;
114
115 case '\u2013':
116 replacement = "–";
117
118 break;
119
120 case '\u2014':
121 replacement = "—";
122
123 break;
124
125 case '\u2028':
126 replacement = "舲";
127
128 break;
129 }
130
131 if (replacement != null) {
132 if (sb == null) {
133 sb = new StringBundler();
134 }
135
136 if (i > lastReplacementIndex) {
137 sb.append(text.substring(lastReplacementIndex, i));
138 }
139
140 sb.append(replacement);
141
142 lastReplacementIndex = i + 1;
143 }
144 }
145
146 if (sb == null) {
147 return text;
148 }
149
150 if (lastReplacementIndex < text.length()) {
151 sb.append(text.substring(lastReplacementIndex));
152 }
153
154 return sb.toString();
155 }
156
157 @Override
158 public String escape(String text, int type) {
159 if (text == null) {
160 return null;
161 }
162
163 if (text.length() == 0) {
164 return StringPool.BLANK;
165 }
166
167 String prefix = StringPool.BLANK;
168 String postfix = StringPool.BLANK;
169
170 if (type == ESCAPE_MODE_ATTRIBUTE) {
171 prefix = "&#x";
172 postfix = StringPool.SEMICOLON;
173 }
174 else if (type == ESCAPE_MODE_CSS) {
175 prefix = StringPool.BACK_SLASH;
176 }
177 else if (type == ESCAPE_MODE_JS) {
178 prefix = "\\x";
179 }
180 else if (type == ESCAPE_MODE_URL) {
181 return HttpUtil.encodeURL(text, true);
182 }
183 else {
184 return escape(text);
185 }
186
187 StringBuilder sb = new StringBuilder();
188
189 for (int i = 0; i < text.length(); i++) {
190 char c = text.charAt(i);
191
192 if ((c > 255) || Character.isLetterOrDigit(c) ||
193 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
194
195 sb.append(c);
196 }
197 else {
198 sb.append(prefix);
199
200 String hexString = StringUtil.toHexString(c);
201
202 if (hexString.length() == 1) {
203 sb.append(StringPool.ASCII_TABLE[48]);
204 }
205
206 sb.append(hexString);
207 sb.append(postfix);
208 }
209 }
210
211 if (sb.length() == text.length()) {
212 return text;
213 }
214 else {
215 return sb.toString();
216 }
217 }
218
219 @Override
220 public String escapeAttribute(String attribute) {
221 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
222 }
223
224 @Override
225 public String escapeCSS(String css) {
226 return escape(css, ESCAPE_MODE_CSS);
227 }
228
229 @Override
230 public String escapeHREF(String href) {
231 if (href == null) {
232 return null;
233 }
234
235 if (href.length() == 0) {
236 return StringPool.BLANK;
237 }
238
239 int index = href.indexOf(StringPool.COLON);
240
241 if (index == 4) {
242 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
243
244 if (protocol.equals("data")) {
245 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
246 }
247 }
248 else if (index == 10) {
249 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
250
251 if (protocol.equals("javascript")) {
252 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
253 }
254 }
255
256 return escapeAttribute(href);
257 }
258
259 @Override
260 public String escapeJS(String js) {
261 return escape(js, ESCAPE_MODE_JS);
262 }
263
264 @Override
265 public String escapeURL(String url) {
266 return escape(url, ESCAPE_MODE_URL);
267 }
268
269 @Override
270 public String escapeXPath(String xPath) {
271 if (Validator.isNull(xPath)) {
272 return xPath;
273 }
274
275 StringBuilder sb = new StringBuilder(xPath.length());
276
277 for (int i = 0; i < xPath.length(); i++) {
278 char c = xPath.charAt(i);
279
280 boolean hasToken = false;
281
282 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
283 if (c == _XPATH_TOKENS[j]) {
284 hasToken = true;
285
286 break;
287 }
288 }
289
290 if (hasToken) {
291 sb.append(StringPool.UNDERLINE);
292 }
293 else {
294 sb.append(c);
295 }
296 }
297
298 return sb.toString();
299 }
300
301 @Override
302 public String escapeXPathAttribute(String xPathAttribute) {
303 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
304 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
305
306 if (hasQuote && hasApostrophe) {
307 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
308
309 return "concat('".concat(
310 StringUtil.merge(parts, "', \"'\", '")).concat("')");
311 }
312
313 if (hasQuote) {
314 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
315 StringPool.APOSTROPHE);
316 }
317
318 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
319 }
320
321 @Override
322 public String extractText(String html) {
323 if (html == null) {
324 return null;
325 }
326
327 Source source = new Source(html);
328
329 TextExtractor textExtractor = source.getTextExtractor();
330
331 return textExtractor.toString();
332 }
333
334 @Override
335 public String fromInputSafe(String text) {
336 return StringUtil.replace(text, "&", "&");
337 }
338
339 @Override
340 public String getAUICompatibleId(String text) {
341 if (Validator.isNull(text)) {
342 return text;
343 }
344
345 StringBundler sb = null;
346
347 int lastReplacementIndex = 0;
348
349 for (int i = 0; i < text.length(); i++) {
350 char c = text.charAt(i);
351
352 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
353 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
354 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
355 (c != CharPool.NO_BREAK_SPACE))) {
356
357 continue;
358 }
359
360 if (sb == null) {
361 sb = new StringBundler();
362 }
363
364 if (i > lastReplacementIndex) {
365 sb.append(text.substring(lastReplacementIndex, i));
366 }
367
368 sb.append(CharPool.UNDERLINE);
369
370 if (c != CharPool.UNDERLINE) {
371 sb.append(StringUtil.toHexString(c));
372 }
373
374 sb.append(CharPool.UNDERLINE);
375
376 lastReplacementIndex = i + 1;
377 }
378
379 if (sb == null) {
380 return text;
381 }
382
383 if (lastReplacementIndex < text.length()) {
384 sb.append(text.substring(lastReplacementIndex));
385 }
386
387 return sb.toString();
388 }
389
390 @Deprecated
391 @Override
392 public String render(String html) {
393 if (html == null) {
394 return null;
395 }
396
397 Source source = new Source(html);
398
399 Renderer renderer = source.getRenderer();
400
401 return renderer.toString();
402 }
403
404 @Override
405 public String replaceMsWordCharacters(String text) {
406 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
407 }
408
409 @Override
410 public String replaceNewLine(String text) {
411 if (text == null) {
412 return null;
413 }
414
415 return text.replaceAll("\r?\n", "<br />");
416 }
417
418 @Override
419 public String stripBetween(String text, String tag) {
420 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
421 }
422
423 @Override
424 public String stripComments(String text) {
425 return StringUtil.stripBetween(text, "<!--", "-->");
426 }
427
428 @Override
429 public String stripHtml(String text) {
430 if (text == null) {
431 return null;
432 }
433
434 text = stripComments(text);
435
436 StringBuilder sb = new StringBuilder(text.length());
437
438 int x = 0;
439 int y = text.indexOf("<");
440
441 while (y != -1) {
442 sb.append(text.substring(x, y));
443 sb.append(StringPool.SPACE);
444
445
446
447 if (isTag(_TAG_SCRIPT, text, y + 1)) {
448 y = stripTag(_TAG_SCRIPT, text, y);
449 }
450 else if (isTag(_TAG_STYLE, text, y + 1)) {
451 y = stripTag(_TAG_STYLE, text, y);
452 }
453
454 x = text.indexOf(">", y);
455
456 if (x == -1) {
457 break;
458 }
459
460 x++;
461
462 if (x < y) {
463
464
465
466 break;
467 }
468
469 y = text.indexOf("<", x);
470 }
471
472 if (y == -1) {
473 sb.append(text.substring(x));
474 }
475
476 return sb.toString();
477 }
478
479 @Override
480 public String toInputSafe(String text) {
481 return StringUtil.replace(
482 text,
483 new String[] {"&", "\""},
484 new String[] {"&", """});
485 }
486
487 @Override
488 public String unescape(String text) {
489 if (text == null) {
490 return null;
491 }
492
493 if (text.length() == 0) {
494 return StringPool.BLANK;
495 }
496
497
498
499 text = StringUtil.replace(text, "<", "<");
500 text = StringUtil.replace(text, ">", ">");
501 text = StringUtil.replace(text, "&", "&");
502 text = StringUtil.replace(text, "’", "\u2019");
503 text = StringUtil.replace(text, """, "\"");
504 text = StringUtil.replace(text, "'", "'");
505 text = StringUtil.replace(text, "(", "(");
506 text = StringUtil.replace(text, ")", ")");
507 text = StringUtil.replace(text, ",", ",");
508 text = StringUtil.replace(text, "#", "#");
509 text = StringUtil.replace(text, "%", "%");
510 text = StringUtil.replace(text, ";", ";");
511 text = StringUtil.replace(text, "=", "=");
512 text = StringUtil.replace(text, "+", "+");
513 text = StringUtil.replace(text, "-", "-");
514
515 return text;
516 }
517
518 @Override
519 public String unescapeCDATA(String text) {
520 if (text == null) {
521 return null;
522 }
523
524 if (text.length() == 0) {
525 return StringPool.BLANK;
526 }
527
528 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
529 text = StringUtil.replace(text, "]]>", "]]>");
530
531 return text;
532 }
533
534 @Override
535 public String wordBreak(String text, int columns) {
536 StringBundler sb = new StringBundler();
537
538 int length = 0;
539 int lastWrite = 0;
540 int pos = 0;
541
542 Pattern pattern = Pattern.compile("([\\s<&]|$)");
543
544 Matcher matcher = pattern.matcher(text);
545
546 while (matcher.find()) {
547 if (matcher.start() < pos) {
548 continue;
549 }
550
551 while ((length + matcher.start() - pos) >= columns) {
552 pos += columns - length;
553
554 sb.append(text.substring(lastWrite, pos));
555 sb.append("<wbr/>­");
556
557 length = 0;
558 lastWrite = pos;
559 }
560
561 length += matcher.start() - pos;
562
563 String group = matcher.group();
564
565 if (group.equals(StringPool.AMPERSAND)) {
566 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
567
568 if (x != -1) {
569 length++;
570 pos = x + 1;
571 }
572
573 continue;
574 }
575
576 if (group.equals(StringPool.LESS_THAN)) {
577 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
578
579 if (x != -1) {
580 pos = x + 1;
581 }
582
583 continue;
584 }
585
586 if (group.equals(StringPool.SPACE) ||
587 group.equals(StringPool.NEW_LINE)) {
588
589 length = 0;
590 pos = matcher.start() + 1;
591 }
592 }
593
594 sb.append(text.substring(lastWrite));
595
596 return sb.toString();
597 }
598
599 protected boolean isTag(char[] tag, String text, int pos) {
600 if ((pos + tag.length + 1) <= text.length()) {
601 char item;
602
603 for (int i = 0; i < tag.length; i++) {
604 item = text.charAt(pos++);
605
606 if (Character.toLowerCase(item) != tag[i]) {
607 return false;
608 }
609 }
610
611 item = text.charAt(pos);
612
613
614
615 return !Character.isLetter(item);
616 }
617 else {
618 return false;
619 }
620 }
621
622 protected int stripTag(char[] tag, String text, int pos) {
623 int x = pos + _TAG_SCRIPT.length;
624
625
626
627 x = text.indexOf(">", x);
628
629 if (x < 0) {
630 return pos;
631 }
632
633
634
635 if (text.charAt(x-1) == '/') {
636 return pos;
637 }
638
639
640
641 while (true) {
642 x = text.indexOf("</", x);
643
644 if (x >= 0) {
645 if (isTag(tag, text, x + 2)) {
646 pos = x;
647
648 break;
649 }
650 else {
651
652
653
654 x += 2;
655 }
656 }
657 else {
658 break;
659 }
660 }
661
662 return pos;
663 }
664
665 private static final String[] _MS_WORD_HTML = new String[] {
666 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
667 };
668
669 private static final String[] _MS_WORD_UNICODE = new String[] {
670 "\u00ae", "\u2019", "\u201c", "\u201d"
671 };
672
673 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
674
675 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
676
677
678
679 private static final char[] _XPATH_TOKENS = {
680 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
681 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
682
683 }