001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.util.CharPool;
018    import com.liferay.portal.kernel.util.Html;
019    import com.liferay.portal.kernel.util.HttpUtil;
020    import com.liferay.portal.kernel.util.StringPool;
021    import com.liferay.portal.kernel.util.StringUtil;
022    
023    import net.htmlparser.jericho.Source;
024    
025    /**
026     * @author Brian Wing Shun Chan
027     * @author Clarence Shen
028     * @author Harry Mark
029     * @author Samuel Kong
030     */
031    public class HtmlImpl implements Html {
032    
033            public static final int ESCAPE_MODE_ATTRIBUTE = 1;
034    
035            public static final int ESCAPE_MODE_CSS = 2;
036    
037            public static final int ESCAPE_MODE_JS = 3;
038    
039            public static final int ESCAPE_MODE_TEXT = 4;
040    
041            public static final int ESCAPE_MODE_URL = 5;
042    
043            public String escape(String text) {
044                    if (text == null) {
045                            return null;
046                    }
047    
048                    if (text.length() == 0) {
049                            return StringPool.BLANK;
050                    }
051    
052                    // Escape using XSS recommendations from
053                    // http://www.owasp.org/index.php/Cross_Site_Scripting
054                    // #How_to_Protect_Yourself
055    
056                    StringBuilder sb = new StringBuilder(text.length());
057    
058                    for (int i = 0; i < text.length(); i++) {
059                            char c = text.charAt(i);
060    
061                            switch (c) {
062                                    case '<':
063                                            sb.append("&lt;");
064    
065                                            break;
066    
067                                    case '>':
068                                            sb.append("&gt;");
069    
070                                            break;
071    
072                                    case '&':
073                                            sb.append("&amp;");
074    
075                                            break;
076    
077                                    case '"':
078                                            sb.append("&#034;");
079    
080                                            break;
081    
082                                    case '\'':
083                                            sb.append("&#039;");
084    
085                                            break;
086    
087                                    default:
088                                            sb.append(c);
089    
090                                            break;
091                            }
092                    }
093    
094                    return sb.toString();
095            }
096    
097            public String escape(String text, int type) {
098                    if (text == null) {
099                            return null;
100                    }
101    
102                    if (text.length() == 0) {
103                            return StringPool.BLANK;
104                    }
105    
106                    String prefix = StringPool.BLANK;
107                    String postfix = StringPool.BLANK;
108    
109                    if (type == ESCAPE_MODE_ATTRIBUTE) {
110                            prefix = "&#x";
111                            postfix = StringPool.SEMICOLON;
112                    }
113                    else if (type == ESCAPE_MODE_CSS) {
114                            prefix = StringPool.BACK_SLASH;
115                    }
116                    else if (type == ESCAPE_MODE_JS) {
117                            prefix = "\\x";
118                    }
119                    else if (type == ESCAPE_MODE_URL) {
120                            return HttpUtil.encodeURL(text, true);
121                    }
122                    else {
123                            return escape(text);
124                    }
125    
126                    StringBuilder sb = new StringBuilder();
127    
128                    for (int i = 0; i < text.length(); i++) {
129                            char c = text.charAt(i);
130    
131                            if ((Character.isLetterOrDigit(c)) ||
132                                    (c == CharPool.DASH) || (c == CharPool.UNDERLINE)) {
133    
134                                    sb.append(c);
135                            }
136                            else {
137                                    sb.append(prefix);
138                                    sb.append(Integer.toHexString(c));
139                                    sb.append(postfix);
140                            }
141                    }
142    
143                    return sb.toString();
144            }
145    
146            public String escapeAttribute(String attribute) {
147                    return escape(attribute, ESCAPE_MODE_ATTRIBUTE);
148            }
149    
150            public String escapeCSS(String css) {
151                    return escape(css, ESCAPE_MODE_CSS);
152            }
153    
154            public String escapeHREF(String href) {
155                    if (href == null) {
156                            return null;
157                    }
158    
159                    if (href.length() == 0) {
160                            return StringPool.BLANK;
161                    }
162    
163                    if (href.indexOf(StringPool.COLON) == 10) {
164                            String protocol = href.substring(0, 10).toLowerCase();
165    
166                            if (protocol.equals("javascript")) {
167                                    return StringUtil.replaceFirst(href, StringPool.COLON, "%3a");
168                            }
169                    }
170    
171                    return href;
172            }
173    
174            public String escapeJS(String js) {
175                    return escape(js, ESCAPE_MODE_JS);
176            }
177    
178            public String escapeURL(String url) {
179                    return escape(url, ESCAPE_MODE_URL);
180            }
181    
182            public String extractText(String html) {
183                    if (html == null) {
184                            return null;
185                    }
186    
187                    Source source = new Source(html);
188    
189                    return source.getTextExtractor().toString();
190            }
191    
192            public String fromInputSafe(String text) {
193                    return StringUtil.replace(text, "&amp;", "&");
194            }
195    
196            public String replaceMsWordCharacters(String text) {
197                    return StringUtil.replace(text, _MS_WORD_UNICODE, _MS_WORD_HTML);
198            }
199    
200            public String stripBetween(String text, String tag) {
201                    return StringUtil.stripBetween(text, "<" + tag, "</" + tag + ">");
202            }
203    
204            public String stripComments(String text) {
205                    return StringUtil.stripBetween(text, "<!--", "-->");
206            }
207    
208            public String stripHtml(String text) {
209                    if (text == null) {
210                            return null;
211                    }
212    
213                    text = stripComments(text);
214    
215                    StringBuilder sb = new StringBuilder(text.length());
216    
217                    int x = 0;
218                    int y = text.indexOf("<");
219    
220                    while (y != -1) {
221                            sb.append(text.substring(x, y));
222                            sb.append(StringPool.SPACE);
223    
224                            // Look for text enclosed by <script></script>
225    
226                            boolean scriptFound = isScriptTag(text, y + 1);
227    
228                            if (scriptFound) {
229                                    int pos = y + _TAG_SCRIPT.length;
230    
231                                    // Find end of the tag
232    
233                                    pos = text.indexOf(">", pos);
234    
235                                    if (pos >= 0) {
236    
237                                            // Check if preceding character is / (i.e. is this instance
238                                            // of <script/>)
239    
240                                            if (text.charAt(pos-1) != '/') {
241    
242                                                    // Search for the ending </script> tag
243    
244                                                    for (;;) {
245                                                            pos = text.indexOf("</", pos);
246    
247                                                            if (pos >= 0) {
248                                                                    if (isScriptTag(text, pos + 2)) {
249                                                                            y = pos;
250    
251                                                                            break;
252                                                                    }
253                                                                    else {
254    
255                                                                            // Skip past "</"
256    
257                                                                            pos += 2;
258                                                                    }
259                                                            }
260                                                            else {
261                                                                    break;
262                                                            }
263                                                    }
264                                            }
265                                    }
266                            }
267    
268                            x = text.indexOf(">", y);
269    
270                            if (x == -1) {
271                                    break;
272                            }
273    
274                            x++;
275    
276                            if (x < y) {
277    
278                                    // <b>Hello</b
279    
280                                    break;
281                            }
282    
283                            y = text.indexOf("<", x);
284                    }
285    
286                    if (y == -1) {
287                            sb.append(text.substring(x, text.length()));
288                    }
289    
290                    return sb.toString();
291            }
292    
293            public String toInputSafe(String text) {
294                    return StringUtil.replace(
295                            text,
296                            new String[] {"&", "\""},
297                            new String[] {"&amp;", "&quot;"});
298            }
299    
300            public String unescape(String text) {
301                    if (text == null) {
302                            return null;
303                    }
304    
305                    if (text.length() == 0) {
306                            return StringPool.BLANK;
307                    }
308    
309                    // Optimize this
310    
311                    text = StringUtil.replace(text, "&lt;", "<");
312                    text = StringUtil.replace(text, "&gt;", ">");
313                    text = StringUtil.replace(text, "&amp;", "&");
314                    text = StringUtil.replace(text, "&#034;", "\"");
315                    text = StringUtil.replace(text, "&#039;", "'");
316                    text = StringUtil.replace(text, "&#040;", "(");
317                    text = StringUtil.replace(text, "&#041;", ")");
318                    text = StringUtil.replace(text, "&#044;", ",");
319                    text = StringUtil.replace(text, "&#035;", "#");
320                    text = StringUtil.replace(text, "&#037;", "%");
321                    text = StringUtil.replace(text, "&#059;", ";");
322                    text = StringUtil.replace(text, "&#061;", "=");
323                    text = StringUtil.replace(text, "&#043;", "+");
324                    text = StringUtil.replace(text, "&#045;", "-");
325    
326                    return text;
327            }
328    
329            protected boolean isScriptTag(String text, int pos) {
330                    if (pos + _TAG_SCRIPT.length + 1 <= text.length()) {
331                            char item;
332    
333                            for (int i = 0; i < _TAG_SCRIPT.length; i++) {
334                                    item = text.charAt(pos++);
335    
336                                    if (Character.toLowerCase(item) != _TAG_SCRIPT[i]) {
337                                            return false;
338                                    }
339                            }
340    
341                            item = text.charAt(pos);
342    
343                            // Check that char after "script" is not a letter (i.e. another tag)
344    
345                            return !Character.isLetter(item);
346                    }
347                    else {
348                            return false;
349                    }
350            }
351    
352            private static final String[] _MS_WORD_UNICODE = new String[] {
353                    "\u00ae", "\u2019", "\u201c", "\u201d"
354            };
355    
356            private static final String[] _MS_WORD_HTML = new String[] {
357                    "&reg;", StringPool.APOSTROPHE, StringPool.QUOTE, StringPool.QUOTE
358            };
359    
360            private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'};
361    
362    }