001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringBundler;
021 import com.liferay.portal.kernel.util.StringPool;
022 import com.liferay.portal.kernel.util.StringUtil;
023 import com.liferay.portal.kernel.util.Validator;
024
025 import java.util.regex.Matcher;
026 import java.util.regex.Pattern;
027
028 import net.htmlparser.jericho.Renderer;
029 import net.htmlparser.jericho.Source;
030 import net.htmlparser.jericho.TextExtractor;
031
032
040 public class HtmlImpl implements Html {
041
042 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
043
044 public static final int ESCAPE_MODE_CSS = 2;
045
046 public static final int ESCAPE_MODE_JS = 3;
047
048 public static final int ESCAPE_MODE_TEXT = 4;
049
050 public static final int ESCAPE_MODE_URL = 5;
051
052 public String escape(String text) {
053 if (text == null) {
054 return null;
055 }
056
057 if (text.length() == 0) {
058 return StringPool.BLANK;
059 }
060
061
062
063
064
065 StringBundler sb = null;
066
067 int lastReplacementIndex = 0;
068
069 for (int i = 0; i < text.length(); i++) {
070 char c = text.charAt(i);
071
072 String replacement = null;
073
074 switch (c) {
075 case '<':
076 replacement = "<";
077
078 break;
079
080 case '>':
081 replacement = ">";
082
083 break;
084
085 case '&':
086 replacement = "&";
087
088 break;
089
090 case '"':
091 replacement = """;
092
093 break;
094
095 case '\'':
096 replacement = "'";
097
098 break;
099
100 case '\u00bb':
101 replacement = "»";
102
103 break;
104
105 case '\u2013':
106 replacement = "–";
107
108 break;
109
110 case '\u2014':
111 replacement = "—";
112
113 break;
114 }
115
116 if (replacement != null) {
117 if (sb == null) {
118 sb = new StringBundler();
119 }
120
121 if (i > lastReplacementIndex) {
122 sb.append(text.substring(lastReplacementIndex, i));
123 }
124
125 sb.append(replacement);
126
127 lastReplacementIndex = i + 1;
128 }
129 }
130
131 if (sb == null) {
132 return text;
133 }
134 else {
135 if (lastReplacementIndex < text.length()) {
136 sb.append(text.substring(lastReplacementIndex));
137 }
138
139 return sb.toString();
140 }
141 }
142
143 public String escape(String text, int type) {
144 if (text == null) {
145 return null;
146 }
147
148 if (text.length() == 0) {
149 return StringPool.BLANK;
150 }
151
152 String prefix = StringPool.BLANK;
153 String postfix = StringPool.BLANK;
154
155 if (type == ESCAPE_MODE_ATTRIBUTE) {
156 prefix = "&#x";
157 postfix = StringPool.SEMICOLON;
158 }
159 else if (type == ESCAPE_MODE_CSS) {
160 prefix = StringPool.BACK_SLASH;
161 }
162 else if (type == ESCAPE_MODE_JS) {
163 prefix = "\\x";
164 }
165 else if (type == ESCAPE_MODE_URL) {
166 return HttpUtil.encodeURL(text, true);
167 }
168 else {
169 return escape(text);
170 }
171
172 StringBuilder sb = new StringBuilder();
173
174 for (int i = 0; i < text.length(); i++) {
175 char c = text.charAt(i);
176
177 if (Character.isLetterOrDigit(c) ||
178 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
179
180 sb.append(c);
181 }
182 else {
183 sb.append(prefix);
184
185 String hexString = StringUtil.toHexString(c);
186
187 if (hexString.length() == 1) {
188 sb.append(StringPool.ASCII_TABLE[48]);
189 }
190
191 sb.append(hexString);
192 sb.append(postfix);
193 }
194 }
195
196 if (sb.length() == text.length()) {
197 return text;
198 }
199 else {
200 return sb.toString();
201 }
202 }
203
204 public String escapeAttribute(String attribute) {
205 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
206 }
207
208 public String escapeCSS(String css) {
209 return escape(css, ESCAPE_MODE_CSS);
210 }
211
212 public String escapeHREF(String href) {
213 if (href == null) {
214 return null;
215 }
216
217 if (href.length() == 0) {
218 return StringPool.BLANK;
219 }
220
221 if (href.indexOf(StringPool.COLON) == 10) {
222 String protocol = href.substring(0, 10).toLowerCase();
223
224 if (protocol.equals("javascript")) {
225 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
226 }
227 }
228
229 return href;
230 }
231
232 public String escapeJS(String js) {
233 return escape(js, ESCAPE_MODE_JS);
234 }
235
236 public String escapeURL(String url) {
237 return escape(url, ESCAPE_MODE_URL);
238 }
239
240 public String escapeXPath(String xPath) {
241 if (Validator.isNull(xPath)) {
242 return xPath;
243 }
244
245 StringBuilder sb = new StringBuilder(xPath.length());
246
247 for (int i = 0; i < xPath.length(); i++) {
248 char c = xPath.charAt(i);
249
250 boolean hasToken = false;
251
252 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
253 if (c == _XPATH_TOKENS[j]) {
254 hasToken = true;
255
256 break;
257 }
258 }
259
260 if (hasToken) {
261 sb.append(StringPool.UNDERLINE);
262 }
263 else {
264 sb.append(c);
265 }
266 }
267
268 return sb.toString();
269 }
270
271 public String escapeXPathAttribute(String xPathAttribute) {
272 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
273 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
274
275 if (hasQuote && hasApostrophe) {
276 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
277
278 return "concat('".concat(
279 StringUtil.merge(parts, "', \"'\", '")).concat("')");
280 }
281
282 if (hasQuote) {
283 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
284 StringPool.APOSTROPHE);
285 }
286
287 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
288 }
289
290 public String extractText(String html) {
291 if (html == null) {
292 return null;
293 }
294
295 Source source = new Source(html);
296
297 TextExtractor textExtractor = source.getTextExtractor();
298
299 return textExtractor.toString();
300 }
301
302 public String fromInputSafe(String text) {
303 return StringUtil.replace(text, "&", "&");
304 }
305
306 public String render(String html) {
307 if (html == null) {
308 return null;
309 }
310
311 Source source = new Source(html);
312
313 Renderer renderer = source.getRenderer();
314
315 return renderer.toString();
316 }
317
318 public String replaceMsWordCharacters(String text) {
319 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
320 }
321
322 public String stripBetween(String text, String tag) {
323 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
324 }
325
326 public String stripComments(String text) {
327 return StringUtil.stripBetween(text, "<!--", "-->");
328 }
329
330 public String stripHtml(String text) {
331 if (text == null) {
332 return null;
333 }
334
335 text = stripComments(text);
336
337 StringBuilder sb = new StringBuilder(text.length());
338
339 int x = 0;
340 int y = text.indexOf("<");
341
342 while (y != -1) {
343 sb.append(text.substring(x, y));
344 sb.append(StringPool.SPACE);
345
346
347
348 boolean scriptFound = isScriptTag(text, y + 1);
349
350 if (scriptFound) {
351 int pos = y + _TAG_SCRIPT.length;
352
353
354
355 pos = text.indexOf(">", pos);
356
357 if (pos >= 0) {
358
359
360
361
362 if (text.charAt(pos-1) != '/') {
363
364
365
366 for (;;) {
367 pos = text.indexOf("</", pos);
368
369 if (pos >= 0) {
370 if (isScriptTag(text, pos + 2)) {
371 y = pos;
372
373 break;
374 }
375 else {
376
377
378
379 pos += 2;
380 }
381 }
382 else {
383 break;
384 }
385 }
386 }
387 }
388 }
389
390 x = text.indexOf(">", y);
391
392 if (x == -1) {
393 break;
394 }
395
396 x++;
397
398 if (x < y) {
399
400
401
402 break;
403 }
404
405 y = text.indexOf("<", x);
406 }
407
408 if (y == -1) {
409 sb.append(text.substring(x));
410 }
411
412 return sb.toString();
413 }
414
415 public String toInputSafe(String text) {
416 return StringUtil.replace(
417 text,
418 new String[] {"&", "\""},
419 new String[] {"&", """});
420 }
421
422 public String unescape(String text) {
423 if (text == null) {
424 return null;
425 }
426
427 if (text.length() == 0) {
428 return StringPool.BLANK;
429 }
430
431
432
433 text = StringUtil.replace(text, "<", "<");
434 text = StringUtil.replace(text, ">", ">");
435 text = StringUtil.replace(text, "&", "&");
436 text = StringUtil.replace(text, """, "\"");
437 text = StringUtil.replace(text, "'", "'");
438 text = StringUtil.replace(text, "(", "(");
439 text = StringUtil.replace(text, ")", ")");
440 text = StringUtil.replace(text, ",", ",");
441 text = StringUtil.replace(text, "#", "#");
442 text = StringUtil.replace(text, "%", "%");
443 text = StringUtil.replace(text, ";", ";");
444 text = StringUtil.replace(text, "=", "=");
445 text = StringUtil.replace(text, "+", "+");
446 text = StringUtil.replace(text, "-", "-");
447
448 return text;
449 }
450
451 public String unescapeCDATA(String text) {
452 if (text == null) {
453 return null;
454 }
455
456 if (text.length() == 0) {
457 return StringPool.BLANK;
458 }
459
460 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
461 text = StringUtil.replace(text, "]]>", "]]>");
462
463 return text;
464 }
465
466 public String wordBreak(String text, int columns) {
467 StringBundler sb = new StringBundler();
468
469 int length = 0;
470 int lastWrite = 0;
471 int pos = 0;
472
473 Pattern pattern = Pattern.compile("([\\s<&]|$)");
474
475 Matcher matcher = pattern.matcher(text);
476
477 while (matcher.find()) {
478 if (matcher.start() < pos) {
479 continue;
480 }
481
482 while ((length + matcher.start() - pos) >= columns) {
483 pos += columns - length;
484
485 sb.append(text.substring(lastWrite, pos));
486 sb.append("<wbr/>­");
487
488 length = 0;
489 lastWrite = pos;
490 }
491
492 length += matcher.start() - pos;
493
494 String group = matcher.group();
495
496 if (group.equals(StringPool.AMPERSAND)) {
497 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
498
499 if (x != -1) {
500 length++;
501 pos = x + 1;
502 }
503
504 continue;
505 }
506
507 if (group.equals(StringPool.LESS_THAN)) {
508 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
509
510 if (x != -1) {
511 pos = x + 1;
512 }
513
514 continue;
515 }
516
517 if (group.equals(StringPool.SPACE) ||
518 group.equals(StringPool.NEW_LINE)) {
519
520 length = 0;
521 pos = matcher.start() + 1;
522 }
523 }
524
525 sb.append(text.substring(lastWrite));
526
527 return sb.toString();
528 }
529
530 protected boolean isScriptTag(String text, int pos) {
531 if ((pos + _TAG_SCRIPT.length + 1) <= text.length()) {
532 char item;
533
534 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
535 item = text.charAt(pos++);
536
537 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
538 return false;
539 }
540 }
541
542 item = text.charAt(pos);
543
544
545
546 return !Character.isLetter(item);
547 }
548 else {
549 return false;
550 }
551 }
552
553 private static final String[] _MS_WORD_HTML = new String[] {
554 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
555 };
556
557 private static final String[] _MS_WORD_UNICODE = new String[] {
558 "\u00ae", "\u2019", "\u201c", "\u201d"
559 };
560
561 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
562
563
564
565 private static final char[] _XPATH_TOKENS = {
566 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
567 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
568
569 }