001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringBundler;
021 import com.liferay.portal.kernel.util.StringPool;
022 import com.liferay.portal.kernel.util.StringUtil;
023 import com.liferay.portal.kernel.util.Validator;
024
025 import java.util.regex.Matcher;
026 import java.util.regex.Pattern;
027
028 import net.htmlparser.jericho.Source;
029 import net.htmlparser.jericho.TextExtractor;
030
031
039 public class HtmlImpl implements Html {
040
041 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
042
043 public static final int ESCAPE_MODE_CSS = 2;
044
045 public static final int ESCAPE_MODE_JS = 3;
046
047 public static final int ESCAPE_MODE_TEXT = 4;
048
049 public static final int ESCAPE_MODE_URL = 5;
050
051 public String escape(String text) {
052 if (text == null) {
053 return null;
054 }
055
056 if (text.length() == 0) {
057 return StringPool.BLANK;
058 }
059
060
061
062
063
064 StringBundler sb = null;
065
066 int lastReplacementIndex = 0;
067
068 for (int i = 0; i < text.length(); i++) {
069 char c = text.charAt(i);
070
071 String replacement = null;
072
073 switch (c) {
074 case '<':
075 replacement = "<";
076
077 break;
078
079 case '>':
080 replacement = ">";
081
082 break;
083
084 case '&':
085 replacement = "&";
086
087 break;
088
089 case '"':
090 replacement = """;
091
092 break;
093
094 case '\'':
095 replacement = "'";
096
097 break;
098
099 case '\u00bb':
100 replacement = "»";
101
102 break;
103
104 case '\u2013':
105 replacement = "–";
106
107 break;
108
109 case '\u2014':
110 replacement = "—";
111
112 break;
113 }
114
115 if (replacement != null) {
116 if (sb == null) {
117 sb = new StringBundler();
118 }
119
120 if (i > lastReplacementIndex) {
121 sb.append(text.substring(lastReplacementIndex, i));
122 }
123
124 sb.append(replacement);
125
126 lastReplacementIndex = i + 1;
127 }
128 }
129
130 if (sb == null) {
131 return text;
132 }
133 else {
134 if (lastReplacementIndex < text.length()) {
135 sb.append(text.substring(lastReplacementIndex));
136 }
137
138 return sb.toString();
139 }
140 }
141
142 public String escape(String text, int type) {
143 if (text == null) {
144 return null;
145 }
146
147 if (text.length() == 0) {
148 return StringPool.BLANK;
149 }
150
151 String prefix = StringPool.BLANK;
152 String postfix = StringPool.BLANK;
153
154 if (type == ESCAPE_MODE_ATTRIBUTE) {
155 prefix = "&#x";
156 postfix = StringPool.SEMICOLON;
157 }
158 else if (type == ESCAPE_MODE_CSS) {
159 prefix = StringPool.BACK_SLASH;
160 }
161 else if (type == ESCAPE_MODE_JS) {
162 prefix = "\\x";
163 }
164 else if (type == ESCAPE_MODE_URL) {
165 return HttpUtil.encodeURL(text, true);
166 }
167 else {
168 return escape(text);
169 }
170
171 StringBuilder sb = new StringBuilder();
172
173 for (int i = 0; i < text.length(); i++) {
174 char c = text.charAt(i);
175
176 if (Character.isLetterOrDigit(c) ||
177 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
178
179 sb.append(c);
180 }
181 else {
182 sb.append(prefix);
183
184 String hexString = StringUtil.toHexString(c);
185
186 if (hexString.length() == 1) {
187 sb.append(StringPool.ASCII_TABLE[48]);
188 }
189
190 sb.append(hexString);
191 sb.append(postfix);
192 }
193 }
194
195 if (sb.length() == text.length()) {
196 return text;
197 }
198 else {
199 return sb.toString();
200 }
201 }
202
203 public String escapeAttribute(String attribute) {
204 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
205 }
206
207 public String escapeCSS(String css) {
208 return escape(css, ESCAPE_MODE_CSS);
209 }
210
211 public String escapeHREF(String href) {
212 if (href == null) {
213 return null;
214 }
215
216 if (href.length() == 0) {
217 return StringPool.BLANK;
218 }
219
220 if (href.indexOf(StringPool.COLON) == 10) {
221 String protocol = href.substring(0, 10).toLowerCase();
222
223 if (protocol.equals("javascript")) {
224 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
225 }
226 }
227
228 return href;
229 }
230
231 public String escapeJS(String js) {
232 return escape(js, ESCAPE_MODE_JS);
233 }
234
235 public String escapeURL(String url) {
236 return escape(url, ESCAPE_MODE_URL);
237 }
238
239 public String escapeXPath(String xPath) {
240 if (Validator.isNull(xPath)) {
241 return xPath;
242 }
243
244 StringBuilder sb = new StringBuilder(xPath.length());
245
246 for (int i = 0; i < xPath.length(); i++) {
247 char c = xPath.charAt(i);
248
249 boolean hasToken = false;
250
251 for (int j = 0; j < _XPATH_TOKENS.length; j++) {
252 if (c == _XPATH_TOKENS[j]) {
253 hasToken = true;
254
255 break;
256 }
257 }
258
259 if (hasToken) {
260 sb.append(StringPool.UNDERLINE);
261 }
262 else {
263 sb.append(c);
264 }
265 }
266
267 return sb.toString();
268 }
269
270 public String escapeXPathAttribute(String xPathAttribute) {
271 boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
272 boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);
273
274 if (hasQuote && hasApostrophe) {
275 String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);
276
277 return "concat('".concat(
278 StringUtil.merge(parts, "', \"'\", '")).concat("')");
279 }
280
281 if (hasQuote) {
282 return StringPool.APOSTROPHE.concat(xPathAttribute).concat(
283 StringPool.APOSTROPHE);
284 }
285
286 return StringPool.QUOTE.concat(xPathAttribute).concat(StringPool.QUOTE);
287 }
288
289 public String extractText(String html) {
290 if (html == null) {
291 return null;
292 }
293
294 Source source = new Source(html);
295
296 TextExtractor textExtractor = source.getTextExtractor();
297
298 return textExtractor.toString();
299 }
300
301 public String fromInputSafe(String text) {
302 return StringUtil.replace(text, "&", "&");
303 }
304
305 public String replaceMsWordCharacters(String text) {
306 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
307 }
308
309 public String stripBetween(String text, String tag) {
310 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
311 }
312
313 public String stripComments(String text) {
314 return StringUtil.stripBetween(text, "<!--", "-->");
315 }
316
317 public String stripHtml(String text) {
318 if (text == null) {
319 return null;
320 }
321
322 text = stripComments(text);
323
324 StringBuilder sb = new StringBuilder(text.length());
325
326 int x = 0;
327 int y = text.indexOf("<");
328
329 while (y != -1) {
330 sb.append(text.substring(x, y));
331 sb.append(StringPool.SPACE);
332
333
334
335 boolean scriptFound = isScriptTag(text, y + 1);
336
337 if (scriptFound) {
338 int pos = y + _TAG_SCRIPT.length;
339
340
341
342 pos = text.indexOf(">", pos);
343
344 if (pos >= 0) {
345
346
347
348
349 if (text.charAt(pos-1) != '/') {
350
351
352
353 for (;;) {
354 pos = text.indexOf("</", pos);
355
356 if (pos >= 0) {
357 if (isScriptTag(text, pos + 2)) {
358 y = pos;
359
360 break;
361 }
362 else {
363
364
365
366 pos += 2;
367 }
368 }
369 else {
370 break;
371 }
372 }
373 }
374 }
375 }
376
377 x = text.indexOf(">", y);
378
379 if (x == -1) {
380 break;
381 }
382
383 x++;
384
385 if (x < y) {
386
387
388
389 break;
390 }
391
392 y = text.indexOf("<", x);
393 }
394
395 if (y == -1) {
396 sb.append(text.substring(x));
397 }
398
399 return sb.toString();
400 }
401
402 public String toInputSafe(String text) {
403 return StringUtil.replace(
404 text,
405 new String[] {"&", "\""},
406 new String[] {"&", """});
407 }
408
409 public String unescape(String text) {
410 if (text == null) {
411 return null;
412 }
413
414 if (text.length() == 0) {
415 return StringPool.BLANK;
416 }
417
418
419
420 text = StringUtil.replace(text, "<", "<");
421 text = StringUtil.replace(text, ">", ">");
422 text = StringUtil.replace(text, "&", "&");
423 text = StringUtil.replace(text, """, "\"");
424 text = StringUtil.replace(text, "'", "'");
425 text = StringUtil.replace(text, "(", "(");
426 text = StringUtil.replace(text, ")", ")");
427 text = StringUtil.replace(text, ",", ",");
428 text = StringUtil.replace(text, "#", "#");
429 text = StringUtil.replace(text, "%", "%");
430 text = StringUtil.replace(text, ";", ";");
431 text = StringUtil.replace(text, "=", "=");
432 text = StringUtil.replace(text, "+", "+");
433 text = StringUtil.replace(text, "-", "-");
434
435 return text;
436 }
437
438 public String unescapeCDATA(String text) {
439 if (text == null) {
440 return null;
441 }
442
443 if (text.length() == 0) {
444 return StringPool.BLANK;
445 }
446
447 text = StringUtil.replace(text, "<![CDATA[", "<![CDATA[");
448 text = StringUtil.replace(text, "]]>", "]]>");
449
450 return text;
451 }
452
453 public String wordBreak(String text, int columns) {
454 StringBundler sb = new StringBundler();
455
456 int length = 0;
457 int lastWrite = 0;
458 int pos = 0;
459
460 Pattern pattern = Pattern.compile("([\\s<&]|$)");
461
462 Matcher matcher = pattern.matcher(text);
463
464 while (matcher.find()) {
465 if (matcher.start() < pos) {
466 continue;
467 }
468
469 while ((length + matcher.start() - pos) >= columns) {
470 pos += columns - length;
471
472 sb.append(text.substring(lastWrite, pos));
473 sb.append("<wbr/>­");
474
475 length = 0;
476 lastWrite = pos;
477 }
478
479 length += matcher.start() - pos;
480
481 String group = matcher.group();
482
483 if (group.equals(StringPool.AMPERSAND)) {
484 int x = text.indexOf(StringPool.SEMICOLON, matcher.start());
485
486 if (x != -1) {
487 length++;
488 pos = x + 1;
489 }
490
491 continue;
492 }
493
494 if (group.equals(StringPool.LESS_THAN)) {
495 int x = text.indexOf(StringPool.GREATER_THAN, matcher.start());
496
497 if (x != -1) {
498 pos = x + 1;
499 }
500
501 continue;
502 }
503
504 if (group.equals(StringPool.SPACE) ||
505 group.equals(StringPool.NEW_LINE)) {
506
507 length = 0;
508 pos = matcher.start() + 1;
509 }
510 }
511
512 sb.append(text.substring(lastWrite));
513
514 return sb.toString();
515 }
516
517 protected boolean isScriptTag(String text, int pos) {
518 if ((pos + _TAG_SCRIPT.length + 1) <= text.length()) {
519 char item;
520
521 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
522 item = text.charAt(pos++);
523
524 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
525 return false;
526 }
527 }
528
529 item = text.charAt(pos);
530
531
532
533 return !Character.isLetter(item);
534 }
535 else {
536 return false;
537 }
538 }
539
540 private static final String[] _MS_WORD_HTML = new String[] {
541 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
542 };
543
544 private static final String[] _MS_WORD_UNICODE = new String[] {
545 "\u00ae", "\u2019", "\u201c", "\u201d"
546 };
547
548 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
549
550
551
552 private static final char[] _XPATH_TOKENS = {
553 '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!',
554 '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232};
555
556 }