001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.util.CharPool;
018 import com.liferay.portal.kernel.util.Html;
019 import com.liferay.portal.kernel.util.HttpUtil;
020 import com.liferay.portal.kernel.util.StringPool;
021 import com.liferay.portal.kernel.util.StringUtil;
022
023 import net.htmlparser.jericho.Source;
024
025
031 public class HtmlImpl implements Html {
032
033 public static final int ESCAPE_MODE_ATTRIBUTE = 1;
034
035 public static final int ESCAPE_MODE_CSS = 2;
036
037 public static final int ESCAPE_MODE_JS = 3;
038
039 public static final int ESCAPE_MODE_TEXT = 4;
040
041 public static final int ESCAPE_MODE_URL = 5;
042
043 public String escape(String text) {
044 if (text == null) {
045 return null;
046 }
047
048 if (text.length() == 0) {
049 return StringPool.BLANK;
050 }
051
052
053
054
055
056 StringBuilder sb = new StringBuilder(text.length());
057
058 for (int i = 0; i < text.length(); i++) {
059 char c = text.charAt(i);
060
061 switch (c) {
062 case '<':
063 sb.append("<");
064
065 break;
066
067 case '>':
068 sb.append(">");
069
070 break;
071
072 case '&':
073 sb.append("&");
074
075 break;
076
077 case '"':
078 sb.append(""");
079
080 break;
081
082 case '\'':
083 sb.append("'");
084
085 break;
086
087 default:
088 sb.append(c);
089
090 break;
091 }
092 }
093
094 return sb.toString();
095 }
096
097 public String escape(String text, int type) {
098 if (text == null) {
099 return null;
100 }
101
102 if (text.length() == 0) {
103 return StringPool.BLANK;
104 }
105
106 String prefix = StringPool.BLANK;
107 String postfix = StringPool.BLANK;
108
109 if (type == ESCAPE_MODE_ATTRIBUTE) {
110 prefix = "&#x";
111 postfix = StringPool.SEMICOLON;
112 }
113 else if (type == ESCAPE_MODE_CSS) {
114 prefix = StringPool.BACK_SLASH;
115 }
116 else if (type == ESCAPE_MODE_JS) {
117 prefix = "\\x";
118 }
119 else if (type == ESCAPE_MODE_URL) {
120 return HttpUtil.encodeURL(text, true);
121 }
122 else {
123 return escape(text);
124 }
125
126 StringBuilder sb = new StringBuilder();
127
128 for (int i = 0; i < text.length(); i++) {
129 char c = text.charAt(i);
130
131 if ((Character.isLetterOrDigit(c)) ||
132 (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
133
134 sb.append(c);
135 }
136 else {
137 sb.append(prefix);
138 sb.append(Integer.toHexString(c));
139 sb.append(postfix);
140 }
141 }
142
143 return sb.toString();
144 }
145
146 public String escapeAttribute(String attribute) {
147 return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
148 }
149
150 public String escapeCSS(String css) {
151 return escape(css, ESCAPE_MODE_CSS);
152 }
153
154 public String escapeHREF(String href) {
155 if (href == null) {
156 return null;
157 }
158
159 if (href.length() == 0) {
160 return StringPool.BLANK;
161 }
162
163 if (href.indexOf(StringPool.COLON) == 10) {
164 String protocol = href.substring(0, 10).toLowerCase();
165
166 if (protocol.equals("javascript")) {
167 return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
168 }
169 }
170
171 return href;
172 }
173
174 public String escapeJS(String js) {
175 return escape(js, ESCAPE_MODE_JS);
176 }
177
178 public String escapeURL(String url) {
179 return escape(url, ESCAPE_MODE_URL);
180 }
181
182 public String extractText(String html) {
183 if (html == null) {
184 return null;
185 }
186
187 Source source = new Source(html);
188
189 return source.getTextExtractor().toString();
190 }
191
192 public String fromInputSafe(String text) {
193 return StringUtil.replace(text, "&", "&");
194 }
195
196 public String replaceMsWordCharacters(String text) {
197 return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
198 }
199
200 public String stripBetween(String text, String tag) {
201 return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
202 }
203
204 public String stripComments(String text) {
205 return StringUtil.stripBetween(text, "<!--", "-->");
206 }
207
208 public String stripHtml(String text) {
209 if (text == null) {
210 return null;
211 }
212
213 text = stripComments(text);
214
215 StringBuilder sb = new StringBuilder(text.length());
216
217 int x = 0;
218 int y = text.indexOf("<");
219
220 while (y != -1) {
221 sb.append(text.substring(x, y));
222 sb.append(StringPool.SPACE);
223
224
225
226 boolean scriptFound = isScriptTag(text, y + 1);
227
228 if (scriptFound) {
229 int pos = y + _TAG_SCRIPT.length;
230
231
232
233 pos = text.indexOf(">", pos);
234
235 if (pos >= 0) {
236
237
238
239
240 if (text.charAt(pos-1) != '/') {
241
242
243
244 for (;;) {
245 pos = text.indexOf("</", pos);
246
247 if (pos >= 0) {
248 if (isScriptTag(text, pos + 2)) {
249 y = pos;
250
251 break;
252 }
253 else {
254
255
256
257 pos += 2;
258 }
259 }
260 else {
261 break;
262 }
263 }
264 }
265 }
266 }
267
268 x = text.indexOf(">", y);
269
270 if (x == -1) {
271 break;
272 }
273
274 x++;
275
276 if (x < y) {
277
278
279
280 break;
281 }
282
283 y = text.indexOf("<", x);
284 }
285
286 if (y == -1) {
287 sb.append(text.substring(x, text.length()));
288 }
289
290 return sb.toString();
291 }
292
293 public String toInputSafe(String text) {
294 return StringUtil.replace(
295 text,
296 new String[] {"&", "\""},
297 new String[] {"&", """});
298 }
299
300 public String unescape(String text) {
301 if (text == null) {
302 return null;
303 }
304
305 if (text.length() == 0) {
306 return StringPool.BLANK;
307 }
308
309
310
311 text = StringUtil.replace(text, "<", "<");
312 text = StringUtil.replace(text, ">", ">");
313 text = StringUtil.replace(text, "&", "&");
314 text = StringUtil.replace(text, """, "\"");
315 text = StringUtil.replace(text, "'", "'");
316 text = StringUtil.replace(text, "(", "(");
317 text = StringUtil.replace(text, ")", ")");
318 text = StringUtil.replace(text, ",", ",");
319 text = StringUtil.replace(text, "#", "#");
320 text = StringUtil.replace(text, "%", "%");
321 text = StringUtil.replace(text, ";", ";");
322 text = StringUtil.replace(text, "=", "=");
323 text = StringUtil.replace(text, "+", "+");
324 text = StringUtil.replace(text, "-", "-");
325
326 return text;
327 }
328
329 protected boolean isScriptTag(String text, int pos) {
330 if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
331 char item;
332
333 for (int i = 0; i < _TAG_SCRIPT.length; i++) {
334 item = text.charAt(pos++);
335
336 if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
337 return false;
338 }
339 }
340
341 item = text.charAt(pos);
342
343
344
345 return !Character.isLetter(item);
346 }
347 else {
348 return false;
349 }
350 }
351
352 private static final String[] _MS_WORD_UNICODE = new String[] {
353 "\u00ae", "\u2019", "\u201c", "\u201d"
354 };
355
356 private static final String[] _MS_WORD_HTML = new String[] {
357 "®", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
358 };
359
360 private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
361
362 }