001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054
061 @Override
062 public String escape(String text) {
063 if (text == null) {
064 return null;
065 }
066
067 if (text.length() == 0) {
068 return StringPool.BLANK;
069 }
070
071
072
073
074
075 StringBundler sb = null;
076
077 int lastReplacementIndex = 0;
078
079 for (int i = 0; i < text.length(); i++) {
080 char c = text.charAt(i);
081
082 String replacement = null;
083
084 switch (c) {
085 case '<':
086 replacement = "<";
087
088 break;
089
090 case '>':
091 replacement = ">";
092
093 break;
094
095 case '&':
096 replacement = "&";
097
098 break;
099
100 case '"':
101 replacement = """;
102
103 break;
104
105 case '\'':
106 replacement = "'";
107
108 break;
109
110 case '\u00bb':
111 replacement = "»";
112
113 break;
114
115 case '\u2013':
116 replacement = "–";
117
118 break;
119
120 case '\u2014':
121 replacement = "—";
122
123 break;
124 }
125
126 if (replacement != null) {
127 if (sb == null) {
128 sb = new StringBundler();
129 }
130
131 if (i > lastReplacementIndex) {
132 sb.append(text.substring(lastReplacementIndex, i));
133 }
134
135 sb.append(replacement);
136
137 lastReplacementIndex = i + 1;
138 }
139 }
140
141 if (sb == null) {
142 return text;
143 }
144
145 if (lastReplacementIndex < text.length()) {
146 sb.append(text.substring(lastReplacementIndex));
147 }
148
149 return sb.toString();
150 }
151
152 @Override
153 public String escape(String text, int type) {
154 if (text == null) {
155 return null;
156 }
157
158 if (text.length() == 0) {
159 return StringPool.BLANK;
160 }
161
162 String prefix = StringPool.BLANK;
163 String postfix = StringPool.BLANK;
164
165 if (type == ESCAPE_MODE_ATTRIBUTE) {
166 prefix = "&#x";
167 postfix = StringPool.SEMICOLON;
168 }
169 else if (type == ESCAPE_MODE_CSS) {
170 prefix = StringPool.BACK_SLASH;
171 }
172 else if (type == ESCAPE_MODE_JS) {
173 prefix = "\\x";
174 }
175 else if (type == ESCAPE_MODE_URL) {
176 return HttpUtil.encodeURL(text, true);
177 }
178 else {
179 return escape(text);
180 }
181
182 StringBuilder sb = new StringBuilder();
183
184 for (int i = 0; i < text.length(); i++) {
185 char c = text.charAt(i);
186
187 if ((c > 255) || Character.isLetterOrDigit(c) ||
188 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
189
190 sb.append(c);
191 }
192 else {
193 sb.append(prefix);
194
195 String hexString = StringUtil.toHexString(c);
196
197 if (hexString.length() == 1) {
198 sb.append(StringPool.ASCII_TABLE[48]);
199 }
200
201 sb.append(hexString);
202 sb.append(postfix);
203 }
204 }
205
206 if (sb.length() == text.length()) {
207 return text;
208 }
209 else {
210 return sb.toString();
211 }
212 }
213
214 @Override
215 public String escapeAttribute(String attribute) {
216 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
217 }
218
219 @Override
220 public String escapeCSS(String css) {
221 return escape(css, ESCAPE_MODE_CSS);
222 }
223
224 @Override
225 public String escapeHREF(String href) {
226 if (href == null) {
227 return null;
228 }
229
230 if (href.length() == 0) {
231 return StringPool.BLANK;
232 }
233
234 if (href.indexOf(StringPool.COLON) == 10) {
235 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
236
237 if (protocol.equals("javascript")) {
238 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
239 }
240 }
241
242 return escapeAttribute(href);
243 }
244
245 @Override
246 public String escapeJS(String js) {
247 return escape(js, ESCAPE_MODE_JS);
248 }
249
250 @Override
251 public String escapeURL(String url) {
252 return escape(url, ESCAPE_MODE_URL);
253 }
254
255 @Override
256 public String escapeXPath(String xPath) {
257 if (Validator.isNull(xPath)) {
258 return xPath;
259 }
260
261 StringBuilder sb = new StringBuilder(xPath.length());
262
263 for (int i = 0; i < xPath.length(); i++) {
264 char c = xPath.charAt(i);
265
266 boolean hasToken = false;
267
268 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
269 if (c == _XPATH_TOKENS[j]) {
270 hasToken = true;
271
272 break;
273 }
274 }
275
276 if (hasToken) {
277 sb.append(StringPool.UNDERLINE);
278 }
279 else {
280 sb.append(c);
281 }
282 }
283
284 return sb.toString();
285 }
286
287 @Override
288 public String escapeXPathAttribute(String xPathAttribute) {
289 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
290 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
291
292 if (hasQuote && hasApostrophe) {
293 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
294
295 return "concat('".concat(
296 StringUtil.merge(parts, "', \"'\", '")).concat("')");
297 }
298
299 if (hasQuote) {
300 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
301 StringPool.APOSTROPHE);
302 }
303
304 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
305 }
306
307 @Override
308 public String extractText(String html) {
309 if (html == null) {
310 return null;
311 }
312
313 Source source = new Source(html);
314
315 TextExtractor textExtractor = source.getTextExtractor();
316
317 return textExtractor.toString();
318 }
319
320 @Override
321 public String fromInputSafe(String text) {
322 return StringUtil.replace(text, "&", "&");
323 }
324
325 @Override
326 public String getAUICompatibleId(String text) {
327 if (Validator.isNull(text)) {
328 return text;
329 }
330
331 StringBundler sb = null;
332
333 int lastReplacementIndex = 0;
334
335 for (int i = 0; i < text.length(); i++) {
336 char c = text.charAt(i);
337
338 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
339 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
340 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
341 (c != CharPool.NO_BREAK_SPACE))) {
342
343 continue;
344 }
345
346 if (sb == null) {
347 sb = new StringBundler();
348 }
349
350 if (i > lastReplacementIndex) {
351 sb.append(text.substring(lastReplacementIndex, i));
352 }
353
354 sb.append(CharPool.UNDERLINE);
355
356 if (c != CharPool.UNDERLINE) {
357 sb.append(StringUtil.toHexString(c));
358 }
359
360 sb.append(CharPool.UNDERLINE);
361
362 lastReplacementIndex = i + 1;
363 }
364
365 if (sb == null) {
366 return text;
367 }
368
369 if (lastReplacementIndex < text.length()) {
370 sb.append(text.substring(lastReplacementIndex));
371 }
372
373 return sb.toString();
374 }
375
376 @Deprecated
377 @Override
378 public String render(String html) {
379 if (html == null) {
380 return null;
381 }
382
383 Source source = new Source(html);
384
385 Renderer renderer = source.getRenderer();
386
387 return renderer.toString();
388 }
389
390 @Override
391 public String replaceMsWordCharacters(String text) {
392 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
393 }
394
395 @Override
396 public String replaceNewLine(String text) {
397 if (text == null) {
398 return null;
399 }
400
401 return text.replaceAll("\r?\n", "<br />");
402 }
403
404 @Override
405 public String stripBetween(String text, String tag) {
406 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
407 }
408
409 @Override
410 public String stripComments(String text) {
411 return StringUtil.stripBetween(text, "<!--", "-->");
412 }
413
414 @Override
415 public String stripHtml(String text) {
416 if (text == null) {
417 return null;
418 }
419
420 text = stripComments(text);
421
422 StringBuilder sb = new StringBuilder(text.length());
423
424 int x = 0;
425 int y = text.indexOf("<");
426
427 while (y != -1) {
428 sb.append(text.substring(x, y));
429 sb.append(StringPool.SPACE);
430
431
432
433 if (isTag(_TAG_SCRIPT, text, y + 1)) {
434 y = stripTag(_TAG_SCRIPT, text, y);
435 }
436 else if (isTag(_TAG_STYLE, text, y + 1)) {
437 y = stripTag(_TAG_STYLE, text, y);
438 }
439
440 x = text.indexOf(">", y);
441
442 if (x == -1) {
443 break;
444 }
445
446 x++;
447
448 if (x < y) {
449
450
451
452 break;
453 }
454
455 y = text.indexOf("<", x);
456 }
457
458 if (y == -1) {
459 sb.append(text.substring(x));
460 }
461
462 return sb.toString();
463 }
464
465 @Override
466 public String toInputSafe(String text) {
467 return StringUtil.replace(
468 text,
469 new String[] {"&", "\""},
470 new String[] {"&", """});
471 }
472
473 @Override
474 public String unescape(String text) {
475 if (text == null) {
476 return null;
477 }
478
479 if (text.length() == 0) {
480 return StringPool.BLANK;
481 }
482
483
484
485 text = StringUtil.replace(text, "<", "<");
486 text = StringUtil.replace(text, ">", ">");
487 text = StringUtil.replace(text, "&", "&");
488 text = StringUtil.replace(text, "’", "\u2019");
489 text = StringUtil.replace(text, """, "\"");
490 text = StringUtil.replace(text, "'", "'");
491 text = StringUtil.replace(text, "(", "(");
492 text = StringUtil.replace(text, ")", ")");
493 text = StringUtil.replace(text, ",", ",");
494 text = StringUtil.replace(text, "#", "#");
495 text = StringUtil.replace(text, "%", "%");
496 text = StringUtil.replace(text, ";", ";");
497 text = StringUtil.replace(text, "=", "=");
498 text = StringUtil.replace(text, "+", "+");
499 text = StringUtil.replace(text, "-", "-");
500
501 return text;
502 }
503
504 @Override
505 public String unescapeCDATA(String text) {
506 if (text == null) {
507 return null;
508 }
509
510 if (text.length() == 0) {
511 return StringPool.BLANK;
512 }
513
514 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
515 text = StringUtil.replace(text, "]]>", "]]>");
516
517 return text;
518 }
519
520 @Override
521 public String wordBreak(String text, int columns) {
522 StringBundler sb = new StringBundler();
523
524 int length = 0;
525 int lastWrite = 0;
526 int pos = 0;
527
528 Pattern pattern = Pattern.compile("([\\s<&]|$)");
529
530 Matcher matcher = pattern.matcher(text);
531
532 while (matcher.find()) {
533 if (matcher.start() < pos) {
534 continue;
535 }
536
537 while ((length + matcher.start() - pos) >= columns) {
538 pos += columns - length;
539
540 sb.append(text.substring(lastWrite, pos));
541 sb.append("<wbr/>­");
542
543 length = 0;
544 lastWrite = pos;
545 }
546
547 length += matcher.start() - pos;
548
549 String group = matcher.group();
550
551 if (group.equals(StringPool.AMPERSAND)) {
552 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
553
554 if (x != -1) {
555 length++;
556 pos = x + 1;
557 }
558
559 continue;
560 }
561
562 if (group.equals(StringPool.LESS_THAN)) {
563 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
564
565 if (x != -1) {
566 pos = x + 1;
567 }
568
569 continue;
570 }
571
572 if (group.equals(StringPool.SPACE) ||
573 group.equals(StringPool.NEW_LINE)) {
574
575 length = 0;
576 pos = matcher.start() + 1;
577 }
578 }
579
580 sb.append(text.substring(lastWrite));
581
582 return sb.toString();
583 }
584
585 protected boolean isTag(char[] tag, String text, int pos) {
586 if ((pos + tag.length + 1) <= text.length()) {
587 char item;
588
589 for (int i = 0; i < tag.length; i++) {
590 item = text.charAt(pos++);
591
592 if (Character.toLowerCase(item) != tag[i]) {
593 return false;
594 }
595 }
596
597 item = text.charAt(pos);
598
599
600
601 return !Character.isLetter(item);
602 }
603 else {
604 return false;
605 }
606 }
607
608 protected int stripTag(char[] tag, String text, int pos) {
609 int x = pos + _TAG_SCRIPT.length;
610
611
612
613 x = text.indexOf(">", x);
614
615 if (x < 0) {
616 return pos;
617 }
618
619
620
621 if (text.charAt(x-1) == '/') {
622 return pos;
623 }
624
625
626
627 while (true) {
628 x = text.indexOf("</", x);
629
630 if (x >= 0) {
631 if (isTag(tag, text, x + 2)) {
632 pos = x;
633
634 break;
635 }
636 else {
637
638
639
640 x += 2;
641 }
642 }
643 else {
644 break;
645 }
646 }
647
648 return pos;
649 }
650
651 private static final String[] _MS_WORD_HTML = new String[] {
652 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
653 };
654
655 private static final String[] _MS_WORD_UNICODE = new String[] {
656 "\u00ae", "\u2019", "\u201c", "\u201d"
657 };
658
659 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
660
661 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
662
663
664
665 private static final char[] _XPATH_TOKENS = {
666 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
667 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
668
669 }