001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054 @Override
055 public String escape(String text) {
056 if (text == null) {
057 return null;
058 }
059
060 if (text.length() == 0) {
061 return StringPool.BLANK;
062 }
063
064
065
066
067
068 StringBundler sb = null;
069
070 int lastReplacementIndex = 0;
071
072 for (int i = 0; i < text.length(); i++) {
073 char c = text.charAt(i);
074
075 String replacement = null;
076
077 switch (c) {
078 case '<':
079 replacement = "<";
080
081 break;
082
083 case '>':
084 replacement = ">";
085
086 break;
087
088 case '&':
089 replacement = "&";
090
091 break;
092
093 case '"':
094 replacement = """;
095
096 break;
097
098 case '\'':
099 replacement = "'";
100
101 break;
102
103 case '\u00bb':
104 replacement = "»";
105
106 break;
107
108 case '\u2013':
109 replacement = "–";
110
111 break;
112
113 case '\u2014':
114 replacement = "—";
115
116 break;
117 }
118
119 if (replacement != null) {
120 if (sb == null) {
121 sb = new StringBundler();
122 }
123
124 if (i > lastReplacementIndex) {
125 sb.append(text.substring(lastReplacementIndex, i));
126 }
127
128 sb.append(replacement);
129
130 lastReplacementIndex = i + 1;
131 }
132 }
133
134 if (sb == null) {
135 return text;
136 }
137 else {
138 if (lastReplacementIndex < text.length()) {
139 sb.append(text.substring(lastReplacementIndex));
140 }
141
142 return sb.toString();
143 }
144 }
145
146 @Override
147 public String escape(String text, int type) {
148 if (text == null) {
149 return null;
150 }
151
152 if (text.length() == 0) {
153 return StringPool.BLANK;
154 }
155
156 String prefix = StringPool.BLANK;
157 String postfix = StringPool.BLANK;
158
159 if (type == ESCAPE_MODE_ATTRIBUTE) {
160 prefix = "&#x";
161 postfix = StringPool.SEMICOLON;
162 }
163 else if (type == ESCAPE_MODE_CSS) {
164 prefix = StringPool.BACK_SLASH;
165 }
166 else if (type == ESCAPE_MODE_JS) {
167 prefix = "\\x";
168 }
169 else if (type == ESCAPE_MODE_URL) {
170 return HttpUtil.encodeURL(text, true);
171 }
172 else {
173 return escape(text);
174 }
175
176 StringBuilder sb = new StringBuilder();
177
178 for (int i = 0; i < text.length(); i++) {
179 char c = text.charAt(i);
180
181 if ((c > 255) || Character.isLetterOrDigit(c) ||
182 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
183
184 sb.append(c);
185 }
186 else {
187 sb.append(prefix);
188
189 String hexString = StringUtil.toHexString(c);
190
191 if (hexString.length() == 1) {
192 sb.append(StringPool.ASCII_TABLE[48]);
193 }
194
195 sb.append(hexString);
196 sb.append(postfix);
197 }
198 }
199
200 if (sb.length() == text.length()) {
201 return text;
202 }
203 else {
204 return sb.toString();
205 }
206 }
207
208 @Override
209 public String escapeAttribute(String attribute) {
210 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
211 }
212
213 @Override
214 public String escapeCSS(String css) {
215 return escape(css, ESCAPE_MODE_CSS);
216 }
217
218 @Override
219 public String escapeHREF(String href) {
220 if (href == null) {
221 return null;
222 }
223
224 if (href.length() == 0) {
225 return StringPool.BLANK;
226 }
227
228 if (href.indexOf(StringPool.COLON) == 10) {
229 String protocol = href.substring(0, 10).toLowerCase();
230
231 if (protocol.equals("javascript")) {
232 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
233 }
234 }
235
236 return escapeAttribute(href);
237 }
238
239 @Override
240 public String escapeJS(String js) {
241 return escape(js, ESCAPE_MODE_JS);
242 }
243
244 @Override
245 public String escapeURL(String url) {
246 return escape(url, ESCAPE_MODE_URL);
247 }
248
249 @Override
250 public String escapeXPath(String xPath) {
251 if (Validator.isNull(xPath)) {
252 return xPath;
253 }
254
255 StringBuilder sb = new StringBuilder(xPath.length());
256
257 for (int i = 0; i < xPath.length(); i++) {
258 char c = xPath.charAt(i);
259
260 boolean hasToken = false;
261
262 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
263 if (c == _XPATH_TOKENS[j]) {
264 hasToken = true;
265
266 break;
267 }
268 }
269
270 if (hasToken) {
271 sb.append(StringPool.UNDERLINE);
272 }
273 else {
274 sb.append(c);
275 }
276 }
277
278 return sb.toString();
279 }
280
281 @Override
282 public String escapeXPathAttribute(String xPathAttribute) {
283 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
284 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
285
286 if (hasQuote && hasApostrophe) {
287 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
288
289 return "concat('".concat(
290 StringUtil.merge(parts, "', \"'\", '")).concat("')");
291 }
292
293 if (hasQuote) {
294 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
295 StringPool.APOSTROPHE);
296 }
297
298 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
299 }
300
301 @Override
302 public String extractText(String html) {
303 if (html == null) {
304 return null;
305 }
306
307 Source source = new Source(html);
308
309 TextExtractor textExtractor = source.getTextExtractor();
310
311 return textExtractor.toString();
312 }
313
314 @Override
315 public String fromInputSafe(String text) {
316 return StringUtil.replace(text, "&", "&");
317 }
318
319 @Override
320 public String render(String html) {
321 if (html == null) {
322 return null;
323 }
324
325 Source source = new Source(html);
326
327 Renderer renderer = source.getRenderer();
328
329 return renderer.toString();
330 }
331
332 @Override
333 public String replaceMsWordCharacters(String text) {
334 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
335 }
336
337 @Override
338 public String replaceNewLine(String text) {
339 if (text == null) {
340 return null;
341 }
342
343 return text.replaceAll("\r?\n", "<br />");
344 }
345
346 @Override
347 public String stripBetween(String text, String tag) {
348 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
349 }
350
351 @Override
352 public String stripComments(String text) {
353 return StringUtil.stripBetween(text, "<!--", "-->");
354 }
355
356 @Override
357 public String stripHtml(String text) {
358 if (text == null) {
359 return null;
360 }
361
362 text = stripComments(text);
363
364 StringBuilder sb = new StringBuilder(text.length());
365
366 int x = 0;
367 int y = text.indexOf("<");
368
369 while (y != -1) {
370 sb.append(text.substring(x, y));
371 sb.append(StringPool.SPACE);
372
373
374
375 if (isTag(_TAG_SCRIPT, text, y + 1)) {
376 y = stripTag(_TAG_SCRIPT, text, y);
377 }
378 else if (isTag(_TAG_STYLE, text, y + 1)) {
379 y = stripTag(_TAG_STYLE, text, y);
380 }
381
382 x = text.indexOf(">", y);
383
384 if (x == -1) {
385 break;
386 }
387
388 x++;
389
390 if (x < y) {
391
392
393
394 break;
395 }
396
397 y = text.indexOf("<", x);
398 }
399
400 if (y == -1) {
401 sb.append(text.substring(x));
402 }
403
404 return sb.toString();
405 }
406
407 @Override
408 public String toInputSafe(String text) {
409 return StringUtil.replace(
410 text,
411 new String[] {"&", "\""},
412 new String[] {"&", """});
413 }
414
415 @Override
416 public String unescape(String text) {
417 if (text == null) {
418 return null;
419 }
420
421 if (text.length() == 0) {
422 return StringPool.BLANK;
423 }
424
425
426
427 text = StringUtil.replace(text, "<", "<");
428 text = StringUtil.replace(text, ">", ">");
429 text = StringUtil.replace(text, "&", "&");
430 text = StringUtil.replace(text, """, "\"");
431 text = StringUtil.replace(text, "'", "'");
432 text = StringUtil.replace(text, "(", "(");
433 text = StringUtil.replace(text, ")", ")");
434 text = StringUtil.replace(text, ",", ",");
435 text = StringUtil.replace(text, "#", "#");
436 text = StringUtil.replace(text, "%", "%");
437 text = StringUtil.replace(text, ";", ";");
438 text = StringUtil.replace(text, "=", "=");
439 text = StringUtil.replace(text, "+", "+");
440 text = StringUtil.replace(text, "-", "-");
441
442 return text;
443 }
444
445 @Override
446 public String unescapeCDATA(String text) {
447 if (text == null) {
448 return null;
449 }
450
451 if (text.length() == 0) {
452 return StringPool.BLANK;
453 }
454
455 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
456 text = StringUtil.replace(text, "]]>", "]]>");
457
458 return text;
459 }
460
461 @Override
462 public String wordBreak(String text, int columns) {
463 StringBundler sb = new StringBundler();
464
465 int length = 0;
466 int lastWrite = 0;
467 int pos = 0;
468
469 Pattern pattern = Pattern.compile("([\\s<&]|$)");
470
471 Matcher matcher = pattern.matcher(text);
472
473 while (matcher.find()) {
474 if (matcher.start() < pos) {
475 continue;
476 }
477
478 while ((length + matcher.start() - pos) >= columns) {
479 pos += columns - length;
480
481 sb.append(text.substring(lastWrite, pos));
482 sb.append("<wbr/>­");
483
484 length = 0;
485 lastWrite = pos;
486 }
487
488 length += matcher.start() - pos;
489
490 String group = matcher.group();
491
492 if (group.equals(StringPool.AMPERSAND)) {
493 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
494
495 if (x != -1) {
496 length++;
497 pos = x + 1;
498 }
499
500 continue;
501 }
502
503 if (group.equals(StringPool.LESS_THAN)) {
504 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
505
506 if (x != -1) {
507 pos = x + 1;
508 }
509
510 continue;
511 }
512
513 if (group.equals(StringPool.SPACE) ||
514 group.equals(StringPool.NEW_LINE)) {
515
516 length = 0;
517 pos = matcher.start() + 1;
518 }
519 }
520
521 sb.append(text.substring(lastWrite));
522
523 return sb.toString();
524 }
525
526 protected boolean isTag(char[] tag, String text, int pos) {
527 if ((pos + tag.length + 1) <= text.length()) {
528 char item;
529
530 for (int i = 0; i < tag.length; i++) {
531 item = text.charAt(pos++);
532
533 if (Character.toLowerCase(item) != tag[i]) {
534 return false;
535 }
536 }
537
538 item = text.charAt(pos);
539
540
541
542 return !Character.isLetter(item);
543 }
544 else {
545 return false;
546 }
547 }
548
549 protected int stripTag(char[] tag, String text, int pos) {
550 int x = pos + _TAG_SCRIPT.length;
551
552
553
554 x = text.indexOf(">", x);
555
556 if (x < 0) {
557 return pos;
558 }
559
560
561
562 if (text.charAt(x-1) == '/') {
563 return pos;
564 }
565
566
567
568 for (;;) {
569 x = text.indexOf("</", x);
570
571 if (x >= 0) {
572 if (isTag(tag, text, x + 2)) {
573 pos = x;
574
575 break;
576 }
577 else {
578
579
580
581 x += 2;
582 }
583 }
584 else {
585 break;
586 }
587 }
588
589 return pos;
590 }
591
592 private static final String[] _MS_WORD_HTML = new String[] {
593 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
594 };
595
596 private static final String[] _MS_WORD_UNICODE = new String[] {
597 "\u00ae", "\u2019", "\u201c", "\u201d"
598 };
599
600 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
601
602 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
603
604
605
606 private static final char[] _XPATH_TOKENS = {
607 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
608 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
609
610 }