001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054 @Override
055 public String escape(String text) {
056 if (text == null) {
057 return null;
058 }
059
060 if (text.length() == 0) {
061 return StringPool.BLANK;
062 }
063
064
065
066
067
068 StringBundler sb = null;
069
070 int lastReplacementIndex = 0;
071
072 for (int i = 0; i < text.length(); i++) {
073 char c = text.charAt(i);
074
075 String replacement = null;
076
077 switch (c) {
078 case '<':
079 replacement = "<";
080
081 break;
082
083 case '>':
084 replacement = ">";
085
086 break;
087
088 case '&':
089 replacement = "&";
090
091 break;
092
093 case '"':
094 replacement = """;
095
096 break;
097
098 case '\'':
099 replacement = "'";
100
101 break;
102
103 case '\u00bb':
104 replacement = "»";
105
106 break;
107
108 case '\u2013':
109 replacement = "–";
110
111 break;
112
113 case '\u2014':
114 replacement = "—";
115
116 break;
117 }
118
119 if (replacement != null) {
120 if (sb == null) {
121 sb = new StringBundler();
122 }
123
124 if (i > lastReplacementIndex) {
125 sb.append(text.substring(lastReplacementIndex, i));
126 }
127
128 sb.append(replacement);
129
130 lastReplacementIndex = i + 1;
131 }
132 }
133
134 if (sb == null) {
135 return text;
136 }
137
138 if (lastReplacementIndex < text.length()) {
139 sb.append(text.substring(lastReplacementIndex));
140 }
141
142 return sb.toString();
143 }
144
145 @Override
146 public String escape(String text, int type) {
147 if (text == null) {
148 return null;
149 }
150
151 if (text.length() == 0) {
152 return StringPool.BLANK;
153 }
154
155 String prefix = StringPool.BLANK;
156 String postfix = StringPool.BLANK;
157
158 if (type == ESCAPE_MODE_ATTRIBUTE) {
159 prefix = "&#x";
160 postfix = StringPool.SEMICOLON;
161 }
162 else if (type == ESCAPE_MODE_CSS) {
163 prefix = StringPool.BACK_SLASH;
164 }
165 else if (type == ESCAPE_MODE_JS) {
166 prefix = "\\x";
167 }
168 else if (type == ESCAPE_MODE_URL) {
169 return HttpUtil.encodeURL(text, true);
170 }
171 else {
172 return escape(text);
173 }
174
175 StringBuilder sb = new StringBuilder();
176
177 for (int i = 0; i < text.length(); i++) {
178 char c = text.charAt(i);
179
180 if ((c > 255) || Character.isLetterOrDigit(c) ||
181 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
182
183 sb.append(c);
184 }
185 else {
186 sb.append(prefix);
187
188 String hexString = StringUtil.toHexString(c);
189
190 if (hexString.length() == 1) {
191 sb.append(StringPool.ASCII_TABLE[48]);
192 }
193
194 sb.append(hexString);
195 sb.append(postfix);
196 }
197 }
198
199 if (sb.length() == text.length()) {
200 return text;
201 }
202 else {
203 return sb.toString();
204 }
205 }
206
207 @Override
208 public String escapeAttribute(String attribute) {
209 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
210 }
211
212 @Override
213 public String escapeCSS(String css) {
214 return escape(css, ESCAPE_MODE_CSS);
215 }
216
217 @Override
218 public String escapeHREF(String href) {
219 if (href == null) {
220 return null;
221 }
222
223 if (href.length() == 0) {
224 return StringPool.BLANK;
225 }
226
227 if (href.indexOf(StringPool.COLON) == 10) {
228 String protocol = href.substring(0, 10).toLowerCase();
229
230 if (protocol.equals("javascript")) {
231 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
232 }
233 }
234
235 return escapeAttribute(href);
236 }
237
238 @Override
239 public String escapeJS(String js) {
240 return escape(js, ESCAPE_MODE_JS);
241 }
242
243 @Override
244 public String escapeURL(String url) {
245 return escape(url, ESCAPE_MODE_URL);
246 }
247
248 @Override
249 public String escapeXPath(String xPath) {
250 if (Validator.isNull(xPath)) {
251 return xPath;
252 }
253
254 StringBuilder sb = new StringBuilder(xPath.length());
255
256 for (int i = 0; i < xPath.length(); i++) {
257 char c = xPath.charAt(i);
258
259 boolean hasToken = false;
260
261 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
262 if (c == _XPATH_TOKENS[j]) {
263 hasToken = true;
264
265 break;
266 }
267 }
268
269 if (hasToken) {
270 sb.append(StringPool.UNDERLINE);
271 }
272 else {
273 sb.append(c);
274 }
275 }
276
277 return sb.toString();
278 }
279
280 @Override
281 public String escapeXPathAttribute(String xPathAttribute) {
282 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
283 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
284
285 if (hasQuote && hasApostrophe) {
286 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
287
288 return "concat('".concat(
289 StringUtil.merge(parts, "', \"'\", '")).concat("')");
290 }
291
292 if (hasQuote) {
293 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
294 StringPool.APOSTROPHE);
295 }
296
297 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
298 }
299
300 @Override
301 public String extractText(String html) {
302 if (html == null) {
303 return null;
304 }
305
306 Source source = new Source(html);
307
308 TextExtractor textExtractor = source.getTextExtractor();
309
310 return textExtractor.toString();
311 }
312
313 @Override
314 public String fromInputSafe(String text) {
315 return StringUtil.replace(text, "&", "&");
316 }
317
318 @Override
319 public String render(String html) {
320 if (html == null) {
321 return null;
322 }
323
324 Source source = new Source(html);
325
326 Renderer renderer = source.getRenderer();
327
328 return renderer.toString();
329 }
330
331 @Override
332 public String replaceMsWordCharacters(String text) {
333 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
334 }
335
336 @Override
337 public String replaceNewLine(String text) {
338 if (text == null) {
339 return null;
340 }
341
342 return text.replaceAll("\r?\n", "<br />");
343 }
344
345 @Override
346 public String stripBetween(String text, String tag) {
347 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
348 }
349
350 @Override
351 public String stripComments(String text) {
352 return StringUtil.stripBetween(text, "<!--", "-->");
353 }
354
355 @Override
356 public String stripHtml(String text) {
357 if (text == null) {
358 return null;
359 }
360
361 text = stripComments(text);
362
363 StringBuilder sb = new StringBuilder(text.length());
364
365 int x = 0;
366 int y = text.indexOf("<");
367
368 while (y != -1) {
369 sb.append(text.substring(x, y));
370 sb.append(StringPool.SPACE);
371
372
373
374 if (isTag(_TAG_SCRIPT, text, y + 1)) {
375 y = stripTag(_TAG_SCRIPT, text, y);
376 }
377 else if (isTag(_TAG_STYLE, text, y + 1)) {
378 y = stripTag(_TAG_STYLE, text, y);
379 }
380
381 x = text.indexOf(">", y);
382
383 if (x == -1) {
384 break;
385 }
386
387 x++;
388
389 if (x < y) {
390
391
392
393 break;
394 }
395
396 y = text.indexOf("<", x);
397 }
398
399 if (y == -1) {
400 sb.append(text.substring(x));
401 }
402
403 return sb.toString();
404 }
405
406 @Override
407 public String toInputSafe(String text) {
408 return StringUtil.replace(
409 text,
410 new String[] {"&", "\""},
411 new String[] {"&", """});
412 }
413
414 @Override
415 public String unescape(String text) {
416 if (text == null) {
417 return null;
418 }
419
420 if (text.length() == 0) {
421 return StringPool.BLANK;
422 }
423
424
425
426 text = StringUtil.replace(text, "<", "<");
427 text = StringUtil.replace(text, ">", ">");
428 text = StringUtil.replace(text, "&", "&");
429 text = StringUtil.replace(text, """, "\"");
430 text = StringUtil.replace(text, "'", "'");
431 text = StringUtil.replace(text, "(", "(");
432 text = StringUtil.replace(text, ")", ")");
433 text = StringUtil.replace(text, ",", ",");
434 text = StringUtil.replace(text, "#", "#");
435 text = StringUtil.replace(text, "%", "%");
436 text = StringUtil.replace(text, ";", ";");
437 text = StringUtil.replace(text, "=", "=");
438 text = StringUtil.replace(text, "+", "+");
439 text = StringUtil.replace(text, "-", "-");
440
441 return text;
442 }
443
444 @Override
445 public String unescapeCDATA(String text) {
446 if (text == null) {
447 return null;
448 }
449
450 if (text.length() == 0) {
451 return StringPool.BLANK;
452 }
453
454 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
455 text = StringUtil.replace(text, "]]>", "]]>");
456
457 return text;
458 }
459
460 @Override
461 public String wordBreak(String text, int columns) {
462 StringBundler sb = new StringBundler();
463
464 int length = 0;
465 int lastWrite = 0;
466 int pos = 0;
467
468 Pattern pattern = Pattern.compile("([\\s<&]|$)");
469
470 Matcher matcher = pattern.matcher(text);
471
472 while (matcher.find()) {
473 if (matcher.start() < pos) {
474 continue;
475 }
476
477 while ((length + matcher.start() - pos) >= columns) {
478 pos += columns - length;
479
480 sb.append(text.substring(lastWrite, pos));
481 sb.append("<wbr/>­");
482
483 length = 0;
484 lastWrite = pos;
485 }
486
487 length += matcher.start() - pos;
488
489 String group = matcher.group();
490
491 if (group.equals(StringPool.AMPERSAND)) {
492 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
493
494 if (x != -1) {
495 length++;
496 pos = x + 1;
497 }
498
499 continue;
500 }
501
502 if (group.equals(StringPool.LESS_THAN)) {
503 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
504
505 if (x != -1) {
506 pos = x + 1;
507 }
508
509 continue;
510 }
511
512 if (group.equals(StringPool.SPACE) ||
513 group.equals(StringPool.NEW_LINE)) {
514
515 length = 0;
516 pos = matcher.start() + 1;
517 }
518 }
519
520 sb.append(text.substring(lastWrite));
521
522 return sb.toString();
523 }
524
525 protected boolean isTag(char[] tag, String text, int pos) {
526 if ((pos + tag.length + 1) <= text.length()) {
527 char item;
528
529 for (int i = 0; i < tag.length; i++) {
530 item = text.charAt(pos++);
531
532 if (Character.toLowerCase(item) != tag[i]) {
533 return false;
534 }
535 }
536
537 item = text.charAt(pos);
538
539
540
541 return !Character.isLetter(item);
542 }
543 else {
544 return false;
545 }
546 }
547
548 protected int stripTag(char[] tag, String text, int pos) {
549 int x = pos + _TAG_SCRIPT.length;
550
551
552
553 x = text.indexOf(">", x);
554
555 if (x < 0) {
556 return pos;
557 }
558
559
560
561 if (text.charAt(x-1) == '/') {
562 return pos;
563 }
564
565
566
567 while (true) {
568 x = text.indexOf("</", x);
569
570 if (x >= 0) {
571 if (isTag(tag, text, x + 2)) {
572 pos = x;
573
574 break;
575 }
576 else {
577
578
579
580 x += 2;
581 }
582 }
583 else {
584 break;
585 }
586 }
587
588 return pos;
589 }
590
591 private static final String[] _MS_WORD_HTML = new String[] {
592 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
593 };
594
595 private static final String[] _MS_WORD_UNICODE = new String[] {
596 "\u00ae", "\u2019", "\u201c", "\u201d"
597 };
598
599 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
600
601 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
602
603
604
605 private static final char[] _XPATH_TOKENS = {
606 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
607 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
608
609 }