001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.security.pacl.DoPrivileged;
018 import com.liferay.portal.kernel.util.CharPool;
019 import com.liferay.portal.kernel.util.Html;
020 import com.liferay.portal.kernel.util.HttpUtil;
021 import com.liferay.portal.kernel.util.StringBundler;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.util.regex.Matcher;
027 import java.util.regex.Pattern;
028
029 import net.htmlparser.jericho.Renderer;
030 import net.htmlparser.jericho.Source;
031 import net.htmlparser.jericho.TextExtractor;
032
033
041 @DoPrivileged
042 public class HtmlImpl implements Html {
043
044 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
045
046 public static final int ESCAPE_MODE_CSS = 2;
047
048 public static final int ESCAPE_MODE_JS = 3;
049
050 public static final int ESCAPE_MODE_TEXT = 4;
051
052 public static final int ESCAPE_MODE_URL = 5;
053
054 public String escape(String text) {
055 if (text == null) {
056 return null;
057 }
058
059 if (text.length() == 0) {
060 return StringPool.BLANK;
061 }
062
063
064
065
066
067 StringBundler sb = null;
068
069 int lastReplacementIndex = 0;
070
071 for (int i = 0; i < text.length(); i++) {
072 char c = text.charAt(i);
073
074 String replacement = null;
075
076 switch (c) {
077 case '<':
078 replacement = "<";
079
080 break;
081
082 case '>':
083 replacement = ">";
084
085 break;
086
087 case '&':
088 replacement = "&";
089
090 break;
091
092 case '"':
093 replacement = """;
094
095 break;
096
097 case '\'':
098 replacement = "'";
099
100 break;
101
102 case '\u00bb':
103 replacement = "»";
104
105 break;
106
107 case '\u2013':
108 replacement = "–";
109
110 break;
111
112 case '\u2014':
113 replacement = "—";
114
115 break;
116 }
117
118 if (replacement != null) {
119 if (sb == null) {
120 sb = new StringBundler();
121 }
122
123 if (i > lastReplacementIndex) {
124 sb.append(text.substring(lastReplacementIndex, i));
125 }
126
127 sb.append(replacement);
128
129 lastReplacementIndex = i + 1;
130 }
131 }
132
133 if (sb == null) {
134 return text;
135 }
136 else {
137 if (lastReplacementIndex < text.length()) {
138 sb.append(text.substring(lastReplacementIndex));
139 }
140
141 return sb.toString();
142 }
143 }
144
145 public String escape(String text, int type) {
146 if (text == null) {
147 return null;
148 }
149
150 if (text.length() == 0) {
151 return StringPool.BLANK;
152 }
153
154 String prefix = StringPool.BLANK;
155 String postfix = StringPool.BLANK;
156
157 if (type == ESCAPE_MODE_ATTRIBUTE) {
158 prefix = "&#x";
159 postfix = StringPool.SEMICOLON;
160 }
161 else if (type == ESCAPE_MODE_CSS) {
162 prefix = StringPool.BACK_SLASH;
163 }
164 else if (type == ESCAPE_MODE_JS) {
165 prefix = "\\x";
166 }
167 else if (type == ESCAPE_MODE_URL) {
168 return HttpUtil.encodeURL(text, true);
169 }
170 else {
171 return escape(text);
172 }
173
174 StringBuilder sb = new StringBuilder();
175
176 for (int i = 0; i < text.length(); i++) {
177 char c = text.charAt(i);
178
179 if (Character.isLetterOrDigit(c) ||
180 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
181
182 sb.append(c);
183 }
184 else {
185 sb.append(prefix);
186
187 String hexString = StringUtil.toHexString(c);
188
189 if (hexString.length() == 1) {
190 sb.append(StringPool.ASCII_TABLE[48]);
191 }
192
193 sb.append(hexString);
194 sb.append(postfix);
195 }
196 }
197
198 if (sb.length() == text.length()) {
199 return text;
200 }
201 else {
202 return sb.toString();
203 }
204 }
205
206 public String escapeAttribute(String attribute) {
207 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
208 }
209
210 public String escapeCSS(String css) {
211 return escape(css, ESCAPE_MODE_CSS);
212 }
213
214 public String escapeHREF(String href) {
215 if (href == null) {
216 return null;
217 }
218
219 if (href.length() == 0) {
220 return StringPool.BLANK;
221 }
222
223 if (href.indexOf(StringPool.COLON) == 10) {
224 String protocol = href.substring(0, 10).toLowerCase();
225
226 if (protocol.equals("javascript")) {
227 href = StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
228 }
229 }
230
231 return escapeAttribute(href);
232 }
233
234 public String escapeJS(String js) {
235 return escape(js, ESCAPE_MODE_JS);
236 }
237
238 public String escapeURL(String url) {
239 return escape(url, ESCAPE_MODE_URL);
240 }
241
242 public String escapeXPath(String xPath) {
243 if (Validator.isNull(xPath)) {
244 return xPath;
245 }
246
247 StringBuilder sb = new StringBuilder(xPath.length());
248
249 for (int i = 0; i < xPath.length(); i++) {
250 char c = xPath.charAt(i);
251
252 boolean hasToken = false;
253
254 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
255 if (c == _XPATH_TOKENS[j]) {
256 hasToken = true;
257
258 break;
259 }
260 }
261
262 if (hasToken) {
263 sb.append(StringPool.UNDERLINE);
264 }
265 else {
266 sb.append(c);
267 }
268 }
269
270 return sb.toString();
271 }
272
273 public String escapeXPathAttribute(String xPathAttribute) {
274 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
275 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
276
277 if (hasQuote && hasApostrophe) {
278 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
279
280 return "concat('".concat(
281 StringUtil.merge(parts, "', \"'\", '")).concat("')");
282 }
283
284 if (hasQuote) {
285 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
286 StringPool.APOSTROPHE);
287 }
288
289 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
290 }
291
292 public String extractText(String html) {
293 if (html == null) {
294 return null;
295 }
296
297 Source source = new Source(html);
298
299 TextExtractor textExtractor = source.getTextExtractor();
300
301 return textExtractor.toString();
302 }
303
304 public String fromInputSafe(String text) {
305 return StringUtil.replace(text, "&", "&");
306 }
307
308 public String render(String html) {
309 if (html == null) {
310 return null;
311 }
312
313 Source source = new Source(html);
314
315 Renderer renderer = source.getRenderer();
316
317 return renderer.toString();
318 }
319
320 public String replaceMsWordCharacters(String text) {
321 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
322 }
323
324 public String stripBetween(String text, String tag) {
325 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
326 }
327
328 public String stripComments(String text) {
329 return StringUtil.stripBetween(text, "<!--", "-->");
330 }
331
332 public String stripHtml(String text) {
333 if (text == null) {
334 return null;
335 }
336
337 text = stripComments(text);
338
339 StringBuilder sb = new StringBuilder(text.length());
340
341 int x = 0;
342 int y = text.indexOf("<");
343
344 while (y != -1) {
345 sb.append(text.substring(x, y));
346 sb.append(StringPool.SPACE);
347
348
349
350 if (isTag(_TAG_SCRIPT, text, y + 1)) {
351 y = stripTag(_TAG_SCRIPT, text, y);
352 }
353 else if (isTag(_TAG_STYLE, text, y + 1)) {
354 y = stripTag(_TAG_STYLE, text, y);
355 }
356
357 x = text.indexOf(">", y);
358
359 if (x == -1) {
360 break;
361 }
362
363 x++;
364
365 if (x < y) {
366
367
368
369 break;
370 }
371
372 y = text.indexOf("<", x);
373 }
374
375 if (y == -1) {
376 sb.append(text.substring(x));
377 }
378
379 return sb.toString();
380 }
381
382 public String toInputSafe(String text) {
383 return StringUtil.replace(
384 text,
385 new String[] {"&", "\""},
386 new String[] {"&", """});
387 }
388
389 public String unescape(String text) {
390 if (text == null) {
391 return null;
392 }
393
394 if (text.length() == 0) {
395 return StringPool.BLANK;
396 }
397
398
399
400 text = StringUtil.replace(text, "<", "<");
401 text = StringUtil.replace(text, ">", ">");
402 text = StringUtil.replace(text, "&", "&");
403 text = StringUtil.replace(text, """, "\"");
404 text = StringUtil.replace(text, "'", "'");
405 text = StringUtil.replace(text, "(", "(");
406 text = StringUtil.replace(text, ")", ")");
407 text = StringUtil.replace(text, ",", ",");
408 text = StringUtil.replace(text, "#", "#");
409 text = StringUtil.replace(text, "%", "%");
410 text = StringUtil.replace(text, ";", ";");
411 text = StringUtil.replace(text, "=", "=");
412 text = StringUtil.replace(text, "+", "+");
413 text = StringUtil.replace(text, "-", "-");
414
415 return text;
416 }
417
418 public String unescapeCDATA(String text) {
419 if (text == null) {
420 return null;
421 }
422
423 if (text.length() == 0) {
424 return StringPool.BLANK;
425 }
426
427 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
428 text = StringUtil.replace(text, "]]>", "]]>");
429
430 return text;
431 }
432
433 public String wordBreak(String text, int columns) {
434 StringBundler sb = new StringBundler();
435
436 int length = 0;
437 int lastWrite = 0;
438 int pos = 0;
439
440 Pattern pattern = Pattern.compile("([\\s<&]|$)");
441
442 Matcher matcher = pattern.matcher(text);
443
444 while (matcher.find()) {
445 if (matcher.start() < pos) {
446 continue;
447 }
448
449 while ((length + matcher.start() - pos) >= columns) {
450 pos += columns - length;
451
452 sb.append(text.substring(lastWrite, pos));
453 sb.append("<wbr/>­");
454
455 length = 0;
456 lastWrite = pos;
457 }
458
459 length += matcher.start() - pos;
460
461 String group = matcher.group();
462
463 if (group.equals(StringPool.AMPERSAND)) {
464 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
465
466 if (x != -1) {
467 length++;
468 pos = x + 1;
469 }
470
471 continue;
472 }
473
474 if (group.equals(StringPool.LESS_THAN)) {
475 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
476
477 if (x != -1) {
478 pos = x + 1;
479 }
480
481 continue;
482 }
483
484 if (group.equals(StringPool.SPACE) ||
485 group.equals(StringPool.NEW_LINE)) {
486
487 length = 0;
488 pos = matcher.start() + 1;
489 }
490 }
491
492 sb.append(text.substring(lastWrite));
493
494 return sb.toString();
495 }
496
497 protected boolean isTag(char[] tag, String text, int pos) {
498 if ((pos + tag.length + 1) <= text.length()) {
499 char item;
500
501 for (int i = 0; i < tag.length; i++) {
502 item = text.charAt(pos++);
503
504 if (Character.toLowerCase(item) != tag[i]) {
505 return false;
506 }
507 }
508
509 item = text.charAt(pos);
510
511
512
513 return !Character.isLetter(item);
514 }
515 else {
516 return false;
517 }
518 }
519
520 protected int stripTag(char[] tag, String text, int pos) {
521 int x = pos + _TAG_SCRIPT.length;
522
523
524
525 x = text.indexOf(">", x);
526
527 if (x >= 0) {
528
529
530
531
532 if (text.charAt(x-1) != '/') {
533
534
535
536 for (;;) {
537 x = text.indexOf("</", x);
538
539 if (x >= 0) {
540 if (isTag(tag, text, x + 2)) {
541 pos = x;
542
543 break;
544 }
545 else {
546
547
548
549 x += 2;
550 }
551 }
552 else {
553 break;
554 }
555 }
556 }
557 }
558
559 return pos;
560 }
561
562 private static final String[] _MS_WORD_HTML = new String[] {
563 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
564 };
565
566 private static final String[] _MS_WORD_UNICODE = new String[] {
567 "\u00ae", "\u2019", "\u201c", "\u201d"
568 };
569
570 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
571
572 private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'};
573
574
575
576 private static final char[] _XPATH_TOKENS = {
577 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
578 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
579
580 }