001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054
061 @Override
062 public String escape(String text) {
063 if (text == null) {
064 return null;
065 }
066
067 if (text.length() == 0) {
068 return StringPool.BLANK;
069 }
070
071
072
073
074
075 StringBundler sb = null;
076
077 int lastReplacementIndex = 0;
078
079 for (int i = 0; i < text.length(); i++) {
080 char c = text.charAt(i);
081
082 String replacement = null;
083
084 switch (c) {
085 case '<':
086 replacement = "<";
087
088 break;
089
090 case '>':
091 replacement = ">";
092
093 break;
094
095 case '&':
096 replacement = "&";
097
098 break;
099
100 case '"':
101 replacement = """;
102
103 break;
104
105 case '\'':
106 replacement = "'";
107
108 break;
109
110 case '\u00bb':
111 replacement = "»";
112
113 break;
114
115 case '\u2013':
116 replacement = "–";
117
118 break;
119
120 case '\u2014':
121 replacement = "—";
122
123 break;
124 }
125
126 if (replacement != null) {
127 if (sb == null) {
128 sb = new StringBundler();
129 }
130
131 if (i > lastReplacementIndex) {
132 sb.append(text.substring(lastReplacementIndex, i));
133 }
134
135 sb.append(replacement);
136
137 lastReplacementIndex = i + 1;
138 }
139 }
140
141 if (sb == null) {
142 return text;
143 }
144
145 if (lastReplacementIndex < text.length()) {
146 sb.append(text.substring(lastReplacementIndex));
147 }
148
149 return sb.toString();
150 }
151
152 @Override
153 public String escape(String text, int type) {
154 if (text == null) {
155 return null;
156 }
157
158 if (text.length() == 0) {
159 return StringPool.BLANK;
160 }
161
162 String prefix = StringPool.BLANK;
163 String postfix = StringPool.BLANK;
164
165 if (type == ESCAPE_MODE_ATTRIBUTE) {
166 prefix = "&#x";
167 postfix = StringPool.SEMICOLON;
168 }
169 else if (type == ESCAPE_MODE_CSS) {
170 prefix = StringPool.BACK_SLASH;
171 }
172 else if (type == ESCAPE_MODE_JS) {
173 prefix = "\\x";
174 }
175 else if (type == ESCAPE_MODE_URL) {
176 return HttpUtil.encodeURL(text, true);
177 }
178 else {
179 return escape(text);
180 }
181
182 StringBuilder sb = new StringBuilder();
183
184 for (int i = 0; i < text.length(); i++) {
185 char c = text.charAt(i);
186
187 if ((c > 255) || Character.isLetterOrDigit(c) ||
188 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
189
190 sb.append(c);
191 }
192 else {
193 sb.append(prefix);
194
195 String hexString = StringUtil.toHexString(c);
196
197 if (hexString.length() == 1) {
198 sb.append(StringPool.ASCII_TABLE[48]);
199 }
200
201 sb.append(hexString);
202 sb.append(postfix);
203 }
204 }
205
206 if (sb.length() == text.length()) {
207 return text;
208 }
209 else {
210 return sb.toString();
211 }
212 }
213
214 @Override
215 public String escapeAttribute(String attribute) {
216 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
217 }
218
219 @Override
220 public String escapeCSS(String css) {
221 return escape(css, ESCAPE_MODE_CSS);
222 }
223
224 @Override
225 public String escapeHREF(String href) {
226 if (href == null) {
227 return null;
228 }
229
230 if (href.length() == 0) {
231 return StringPool.BLANK;
232 }
233
234 int index = href.indexOf(StringPool.COLON);
235
236 if (index == 4) {
237 String protocol = StringUtil.toLowerCase(href.substring(0, 4));
238
239 if (protocol.equals("data")) {
240 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
241 }
242 }
243 else if (index == 10) {
244 String protocol = StringUtil.toLowerCase(href.substring(0, 10));
245
246 if (protocol.equals("javascript")) {
247 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
248 }
249 }
250
251 return escapeAttribute(href);
252 }
253
254 @Override
255 public String escapeJS(String js) {
256 return escape(js, ESCAPE_MODE_JS);
257 }
258
259 @Override
260 public String escapeURL(String url) {
261 return escape(url, ESCAPE_MODE_URL);
262 }
263
264 @Override
265 public String escapeXPath(String xPath) {
266 if (Validator.isNull(xPath)) {
267 return xPath;
268 }
269
270 StringBuilder sb = new StringBuilder(xPath.length());
271
272 for (int i = 0; i < xPath.length(); i++) {
273 char c = xPath.charAt(i);
274
275 boolean hasToken = false;
276
277 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
278 if (c == _XPATH_TOKENS[j]) {
279 hasToken = true;
280
281 break;
282 }
283 }
284
285 if (hasToken) {
286 sb.append(StringPool.UNDERLINE);
287 }
288 else {
289 sb.append(c);
290 }
291 }
292
293 return sb.toString();
294 }
295
296 @Override
297 public String escapeXPathAttribute(String xPathAttribute) {
298 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
299 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
300
301 if (hasQuote && hasApostrophe) {
302 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
303
304 return "concat('".concat(
305 StringUtil.merge(parts, "', \"'\", '")).concat("')");
306 }
307
308 if (hasQuote) {
309 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
310 StringPool.APOSTROPHE);
311 }
312
313 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
314 }
315
316 @Override
317 public String extractText(String html) {
318 if (html == null) {
319 return null;
320 }
321
322 Source source = new Source(html);
323
324 TextExtractor textExtractor = source.getTextExtractor();
325
326 return textExtractor.toString();
327 }
328
329 @Override
330 public String fromInputSafe(String text) {
331 return StringUtil.replace(text, "&", "&");
332 }
333
334 @Override
335 public String getAUICompatibleId(String text) {
336 if (Validator.isNull(text)) {
337 return text;
338 }
339
340 StringBundler sb = null;
341
342 int lastReplacementIndex = 0;
343
344 for (int i = 0; i < text.length(); i++) {
345 char c = text.charAt(i);
346
347 if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
348 ((c > 127) && (c != CharPool.FIGURE_SPACE) &&
349 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
350 (c != CharPool.NO_BREAK_SPACE))) {
351
352 continue;
353 }
354
355 if (sb == null) {
356 sb = new StringBundler();
357 }
358
359 if (i > lastReplacementIndex) {
360 sb.append(text.substring(lastReplacementIndex, i));
361 }
362
363 sb.append(CharPool.UNDERLINE);
364
365 if (c != CharPool.UNDERLINE) {
366 sb.append(StringUtil.toHexString(c));
367 }
368
369 sb.append(CharPool.UNDERLINE);
370
371 lastReplacementIndex = i + 1;
372 }
373
374 if (sb == null) {
375 return text;
376 }
377
378 if (lastReplacementIndex < text.length()) {
379 sb.append(text.substring(lastReplacementIndex));
380 }
381
382 return sb.toString();
383 }
384
385 @Deprecated
386 @Override
387 public String render(String html) {
388 if (html == null) {
389 return null;
390 }
391
392 Source source = new Source(html);
393
394 Renderer renderer = source.getRenderer();
395
396 return renderer.toString();
397 }
398
399 @Override
400 public String replaceMsWordCharacters(String text) {
401 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
402 }
403
404 @Override
405 public String replaceNewLine(String text) {
406 if (text == null) {
407 return null;
408 }
409
410 return text.replaceAll("\r?\n", "<br />");
411 }
412
413 @Override
414 public String stripBetween(String text, String tag) {
415 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
416 }
417
418 @Override
419 public String stripComments(String text) {
420 return StringUtil.stripBetween(text, "<!--", "-->");
421 }
422
423 @Override
424 public String stripHtml(String text) {
425 if (text == null) {
426 return null;
427 }
428
429 text = stripComments(text);
430
431 StringBuilder sb = new StringBuilder(text.length());
432
433 int x = 0;
434 int y = text.indexOf("<");
435
436 while (y != -1) {
437 sb.append(text.substring(x, y));
438 sb.append(StringPool.SPACE);
439
440
441
442 if (isTag(_TAG_SCRIPT, text, y + 1)) {
443 y = stripTag(_TAG_SCRIPT, text, y);
444 }
445 else if (isTag(_TAG_STYLE, text, y + 1)) {
446 y = stripTag(_TAG_STYLE, text, y);
447 }
448
449 x = text.indexOf(">", y);
450
451 if (x == -1) {
452 break;
453 }
454
455 x++;
456
457 if (x < y) {
458
459
460
461 break;
462 }
463
464 y = text.indexOf("<", x);
465 }
466
467 if (y == -1) {
468 sb.append(text.substring(x));
469 }
470
471 return sb.toString();
472 }
473
474 @Override
475 public String toInputSafe(String text) {
476 return StringUtil.replace(
477 text,
478 new String[] {"&", "\""},
479 new String[] {"&", """});
480 }
481
482 @Override
483 public String unescape(String text) {
484 if (text == null) {
485 return null;
486 }
487
488 if (text.length() == 0) {
489 return StringPool.BLANK;
490 }
491
492
493
494 text = StringUtil.replace(text, "<", "<");
495 text = StringUtil.replace(text, ">", ">");
496 text = StringUtil.replace(text, "&", "&");
497 text = StringUtil.replace(text, "’", "\u2019");
498 text = StringUtil.replace(text, """, "\"");
499 text = StringUtil.replace(text, "'", "'");
500 text = StringUtil.replace(text, "(", "(");
501 text = StringUtil.replace(text, ")", ")");
502 text = StringUtil.replace(text, ",", ",");
503 text = StringUtil.replace(text, "#", "#");
504 text = StringUtil.replace(text, "%", "%");
505 text = StringUtil.replace(text, ";", ";");
506 text = StringUtil.replace(text, "=", "=");
507 text = StringUtil.replace(text, "+", "+");
508 text = StringUtil.replace(text, "-", "-");
509
510 return text;
511 }
512
513 @Override
514 public String unescapeCDATA(String text) {
515 if (text == null) {
516 return null;
517 }
518
519 if (text.length() == 0) {
520 return StringPool.BLANK;
521 }
522
523 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
524 text = StringUtil.replace(text, "]]>", "]]>");
525
526 return text;
527 }
528
529 @Override
530 public String wordBreak(String text, int columns) {
531 StringBundler sb = new StringBundler();
532
533 int length = 0;
534 int lastWrite = 0;
535 int pos = 0;
536
537 Pattern pattern = Pattern.compile("([\\s<&]|$)");
538
539 Matcher matcher = pattern.matcher(text);
540
541 while (matcher.find()) {
542 if (matcher.start() < pos) {
543 continue;
544 }
545
546 while ((length + matcher.start() - pos) >= columns) {
547 pos += columns - length;
548
549 sb.append(text.substring(lastWrite, pos));
550 sb.append("<wbr/>­");
551
552 length = 0;
553 lastWrite = pos;
554 }
555
556 length += matcher.start() - pos;
557
558 String group = matcher.group();
559
560 if (group.equals(StringPool.AMPERSAND)) {
561 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
562
563 if (x != -1) {
564 length++;
565 pos = x + 1;
566 }
567
568 continue;
569 }
570
571 if (group.equals(StringPool.LESS_THAN)) {
572 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
573
574 if (x != -1) {
575 pos = x + 1;
576 }
577
578 continue;
579 }
580
581 if (group.equals(StringPool.SPACE) ||
582 group.equals(StringPool.NEW_LINE)) {
583
584 length = 0;
585 pos = matcher.start() + 1;
586 }
587 }
588
589 sb.append(text.substring(lastWrite));
590
591 return sb.toString();
592 }
593
594 protected boolean isTag(char[] tag, String text, int pos) {
595 if ((pos + tag.length + 1) <= text.length()) {
596 char item;
597
598 for (int i = 0; i < tag.length; i++) {
599 item = text.charAt(pos++);
600
601 if (Character.toLowerCase(item) != tag[i]) {
602 return false;
603 }
604 }
605
606 item = text.charAt(pos);
607
608
609
610 return !Character.isLetter(item);
611 }
612 else {
613 return false;
614 }
615 }
616
617 protected int stripTag(char[] tag, String text, int pos) {
618 int x = pos + _TAG_SCRIPT.length;
619
620
621
622 x = text.indexOf(">", x);
623
624 if (x < 0) {
625 return pos;
626 }
627
628
629
630 if (text.charAt(x-1) == '/') {
631 return pos;
632 }
633
634
635
636 while (true) {
637 x = text.indexOf("</", x);
638
639 if (x >= 0) {
640 if (isTag(tag, text, x + 2)) {
641 pos = x;
642
643 break;
644 }
645 else {
646
647
648
649 x += 2;
650 }
651 }
652 else {
653 break;
654 }
655 }
656
657 return pos;
658 }
659
660 private static final String[] _MS_WORD_HTML = new String[] {
661 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
662 };
663
664 private static final String[] _MS_WORD_UNICODE = new String[] {
665 "\u00ae", "\u2019", "\u201c", "\u201d"
666 };
667
668 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
669
670 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
671
672
673
674 private static final char[] _XPATH_TOKENS = {
675 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
676 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
677
678 }