001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054 @Override
055 public String escape(String text) {
056 if (text == null) {
057 return null;
058 }
059
060 if (text.length() == 0) {
061 return StringPool.BLANK;
062 }
063
064
065
066
067
068 StringBundler sb = null;
069
070 int lastReplacementIndex = 0;
071
072 for (int i = 0; i < text.length(); i++) {
073 char c = text.charAt(i);
074
075 String replacement = null;
076
077 switch (c) {
078 case '<':
079 replacement = "<";
080
081 break;
082
083 case '>':
084 replacement = ">";
085
086 break;
087
088 case '&':
089 replacement = "&";
090
091 break;
092
093 case '"':
094 replacement = """;
095
096 break;
097
098 case '\'':
099 replacement = "'";
100
101 break;
102
103 case '\u00bb':
104 replacement = "»";
105
106 break;
107
108 case '\u2013':
109 replacement = "–";
110
111 break;
112
113 case '\u2014':
114 replacement = "—";
115
116 break;
117 }
118
119 if (replacement != null) {
120 if (sb == null) {
121 sb = new StringBundler();
122 }
123
124 if (i > lastReplacementIndex) {
125 sb.append(text.substring(lastReplacementIndex, i));
126 }
127
128 sb.append(replacement);
129
130 lastReplacementIndex = i + 1;
131 }
132 }
133
134 if (sb == null) {
135 return text;
136 }
137 else {
138 if (lastReplacementIndex < text.length()) {
139 sb.append(text.substring(lastReplacementIndex));
140 }
141
142 return sb.toString();
143 }
144 }
145
146 @Override
147 public String escape(String text, int type) {
148 if (text == null) {
149 return null;
150 }
151
152 if (text.length() == 0) {
153 return StringPool.BLANK;
154 }
155
156 String prefix = StringPool.BLANK;
157 String postfix = StringPool.BLANK;
158
159 if (type == ESCAPE_MODE_ATTRIBUTE) {
160 prefix = "&#x";
161 postfix = StringPool.SEMICOLON;
162 }
163 else if (type == ESCAPE_MODE_CSS) {
164 prefix = StringPool.BACK_SLASH;
165 }
166 else if (type == ESCAPE_MODE_JS) {
167 prefix = "\\x";
168 }
169 else if (type == ESCAPE_MODE_URL) {
170 return HttpUtil.encodeURL(text, true);
171 }
172 else {
173 return escape(text);
174 }
175
176 StringBuilder sb = new StringBuilder();
177
178 for (int i = 0; i < text.length(); i++) {
179 char c = text.charAt(i);
180
181 if (Character.isLetterOrDigit(c) ||
182 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
183
184 sb.append(c);
185 }
186 else {
187 sb.append(prefix);
188
189 String hexString = StringUtil.toHexString(c);
190
191 if (hexString.length() == 1) {
192 sb.append(StringPool.ASCII_TABLE[48]);
193 }
194
195 sb.append(hexString);
196 sb.append(postfix);
197 }
198 }
199
200 if (sb.length() == text.length()) {
201 return text;
202 }
203 else {
204 return sb.toString();
205 }
206 }
207
208 @Override
209 public String escapeAttribute(String attribute) {
210 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
211 }
212
213 @Override
214 public String escapeCSS(String css) {
215 return escape(css, ESCAPE_MODE_CSS);
216 }
217
218 @Override
219 public String escapeHREF(String href) {
220 if (href == null) {
221 return null;
222 }
223
224 if (href.length() == 0) {
225 return StringPool.BLANK;
226 }
227
228 if (href.indexOf(StringPool.COLON) == 10) {
229 String protocol = href.substring(0, 10).toLowerCase();
230
231 if (protocol.equals("javascript")) {
232 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
233 }
234 }
235
236 return escapeAttribute(href);
237 }
238
239 @Override
240 public String escapeJS(String js) {
241 return escape(js, ESCAPE_MODE_JS);
242 }
243
244 @Override
245 public String escapeURL(String url) {
246 return escape(url, ESCAPE_MODE_URL);
247 }
248
249 @Override
250 public String escapeXPath(String xPath) {
251 if (Validator.isNull(xPath)) {
252 return xPath;
253 }
254
255 StringBuilder sb = new StringBuilder(xPath.length());
256
257 for (int i = 0; i < xPath.length(); i++) {
258 char c = xPath.charAt(i);
259
260 boolean hasToken = false;
261
262 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
263 if (c == _XPATH_TOKENS[j]) {
264 hasToken = true;
265
266 break;
267 }
268 }
269
270 if (hasToken) {
271 sb.append(StringPool.UNDERLINE);
272 }
273 else {
274 sb.append(c);
275 }
276 }
277
278 return sb.toString();
279 }
280
281 @Override
282 public String escapeXPathAttribute(String xPathAttribute) {
283 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
284 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
285
286 if (hasQuote && hasApostrophe) {
287 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
288
289 return "concat('".concat(
290 StringUtil.merge(parts, "', \"'\", '")).concat("')");
291 }
292
293 if (hasQuote) {
294 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
295 StringPool.APOSTROPHE);
296 }
297
298 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
299 }
300
301 @Override
302 public String extractText(String html) {
303 if (html == null) {
304 return null;
305 }
306
307 Source source = new Source(html);
308
309 TextExtractor textExtractor = source.getTextExtractor();
310
311 return textExtractor.toString();
312 }
313
314 @Override
315 public String fromInputSafe(String text) {
316 return StringUtil.replace(text, "&", "&");
317 }
318
319 @Override
320 public String render(String html) {
321 if (html == null) {
322 return null;
323 }
324
325 Source source = new Source(html);
326
327 Renderer renderer = source.getRenderer();
328
329 return renderer.toString();
330 }
331
332 @Override
333 public String replaceMsWordCharacters(String text) {
334 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
335 }
336
337 @Override
338 public String stripBetween(String text, String tag) {
339 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
340 }
341
342 @Override
343 public String stripComments(String text) {
344 return StringUtil.stripBetween(text, "<!--", "-->");
345 }
346
347 @Override
348 public String stripHtml(String text) {
349 if (text == null) {
350 return null;
351 }
352
353 text = stripComments(text);
354
355 StringBuilder sb = new StringBuilder(text.length());
356
357 int x = 0;
358 int y = text.indexOf("<");
359
360 while (y != -1) {
361 sb.append(text.substring(x, y));
362 sb.append(StringPool.SPACE);
363
364
365
366 if (isTag(_TAG_SCRIPT, text, y + 1)) {
367 y = stripTag(_TAG_SCRIPT, text, y);
368 }
369 else if (isTag(_TAG_STYLE, text, y + 1)) {
370 y = stripTag(_TAG_STYLE, text, y);
371 }
372
373 x = text.indexOf(">", y);
374
375 if (x == -1) {
376 break;
377 }
378
379 x++;
380
381 if (x < y) {
382
383
384
385 break;
386 }
387
388 y = text.indexOf("<", x);
389 }
390
391 if (y == -1) {
392 sb.append(text.substring(x));
393 }
394
395 return sb.toString();
396 }
397
398 @Override
399 public String toInputSafe(String text) {
400 return StringUtil.replace(
401 text,
402 new String[] {"&", "\""},
403 new String[] {"&", """});
404 }
405
406 @Override
407 public String unescape(String text) {
408 if (text == null) {
409 return null;
410 }
411
412 if (text.length() == 0) {
413 return StringPool.BLANK;
414 }
415
416
417
418 text = StringUtil.replace(text, "<", "<");
419 text = StringUtil.replace(text, ">", ">");
420 text = StringUtil.replace(text, "&", "&");
421 text = StringUtil.replace(text, """, "\"");
422 text = StringUtil.replace(text, "'", "'");
423 text = StringUtil.replace(text, "(", "(");
424 text = StringUtil.replace(text, ")", ")");
425 text = StringUtil.replace(text, ",", ",");
426 text = StringUtil.replace(text, "#", "#");
427 text = StringUtil.replace(text, "%", "%");
428 text = StringUtil.replace(text, ";", ";");
429 text = StringUtil.replace(text, "=", "=");
430 text = StringUtil.replace(text, "+", "+");
431 text = StringUtil.replace(text, "-", "-");
432
433 return text;
434 }
435
436 @Override
437 public String unescapeCDATA(String text) {
438 if (text == null) {
439 return null;
440 }
441
442 if (text.length() == 0) {
443 return StringPool.BLANK;
444 }
445
446 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
447 text = StringUtil.replace(text, "]]>", "]]>");
448
449 return text;
450 }
451
452 @Override
453 public String wordBreak(String text, int columns) {
454 StringBundler sb = new StringBundler();
455
456 int length = 0;
457 int lastWrite = 0;
458 int pos = 0;
459
460 Pattern pattern = Pattern.compile("([\\s<&]|$)");
461
462 Matcher matcher = pattern.matcher(text);
463
464 while (matcher.find()) {
465 if (matcher.start() < pos) {
466 continue;
467 }
468
469 while ((length + matcher.start() - pos) >= columns) {
470 pos += columns - length;
471
472 sb.append(text.substring(lastWrite, pos));
473 sb.append("<wbr/>­");
474
475 length = 0;
476 lastWrite = pos;
477 }
478
479 length += matcher.start() - pos;
480
481 String group = matcher.group();
482
483 if (group.equals(StringPool.AMPERSAND)) {
484 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
485
486 if (x != -1) {
487 length++;
488 pos = x + 1;
489 }
490
491 continue;
492 }
493
494 if (group.equals(StringPool.LESS_THAN)) {
495 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
496
497 if (x != -1) {
498 pos = x + 1;
499 }
500
501 continue;
502 }
503
504 if (group.equals(StringPool.SPACE) ||
505 group.equals(StringPool.NEW_LINE)) {
506
507 length = 0;
508 pos = matcher.start() + 1;
509 }
510 }
511
512 sb.append(text.substring(lastWrite));
513
514 return sb.toString();
515 }
516
517 protected boolean isTag(char[] tag, String text, int pos) {
518 if ((pos + tag.length + 1) <= text.length()) {
519 char item;
520
521 for (int i = 0; i < tag.length; i++) {
522 item = text.charAt(pos++);
523
524 if (Character.toLowerCase(item) != tag[i]) {
525 return false;
526 }
527 }
528
529 item = text.charAt(pos);
530
531
532
533 return !Character.isLetter(item);
534 }
535 else {
536 return false;
537 }
538 }
539
540 protected int stripTag(char[] tag, String text, int pos) {
541 int x = pos + _TAG_SCRIPT.length;
542
543
544
545 x = text.indexOf(">", x);
546
547 if (x < 0) {
548 return pos;
549 }
550
551
552
553 if (text.charAt(x-1) == '/') {
554 return pos;
555 }
556
557
558
559 for (;;) {
560 x = text.indexOf("</", x);
561
562 if (x >= 0) {
563 if (isTag(tag, text, x + 2)) {
564 pos = x;
565
566 break;
567 }
568 else {
569
570
571
572 x += 2;
573 }
574 }
575 else {
576 break;
577 }
578 }
579
580 return pos;
581 }
582
583 private static final String[] _MS_WORD_HTML = new String[] {
584 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
585 };
586
587 private static final String[] _MS_WORD_UNICODE = new String[] {
588 "\u00ae", "\u2019", "\u201c", "\u201d"
589 };
590
591 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
592
593 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
594
595
596
597 private static final char[] _XPATH_TOKENS = {
598 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
599 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
600
601 }