001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.nio.charset.CharsetDecoderUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.CharacterCodingException;
025    import java.nio.charset.CharsetDecoder;
026    import java.nio.charset.CharsetEncoder;
027    
028    import java.util.BitSet;
029    
030    /**
031     * @author Shuyang Zhou
032     * @author Brian Wing Shun Chan
033     */
034    public class URLCodec {
035    
036            public static String decodeURL(String encodedURLString) {
037                    return decodeURL(encodedURLString, StringPool.UTF8);
038            }
039    
040            public static String decodeURL(
041                    String encodedURLString, String charsetName) {
042    
043                    if (encodedURLString == null) {
044                            return null;
045                    }
046    
047                    if (encodedURLString.length() == 0) {
048                            return StringPool.BLANK;
049                    }
050    
051                    StringBuilder sb = null;
052    
053                    CharsetDecoder charsetDecoder = null;
054    
055                    for (int i = 0; i < encodedURLString.length(); i++) {
056                            char c = encodedURLString.charAt(i);
057    
058                            switch (c) {
059                                    case CharPool.PERCENT:
060                                            ByteBuffer byteBuffer = _getEncodedByteBuffer(
061                                                    encodedURLString, i);
062    
063                                            if (charsetDecoder == null) {
064                                                    charsetDecoder = CharsetDecoderUtil.getCharsetDecoder(
065                                                            charsetName);
066                                            }
067    
068                                            CharBuffer charBuffer = null;
069    
070                                            try {
071                                                    charBuffer = charsetDecoder.decode(byteBuffer);
072                                            }
073                                            catch (CharacterCodingException cce) {
074                                                    _log.error(cce, cce);
075    
076                                                    return StringPool.BLANK;
077                                            }
078    
079                                            if (sb == null) {
080                                                    sb = new StringBuilder(encodedURLString.length());
081    
082                                                    if (i > 0) {
083                                                            sb.append(encodedURLString, 0, i);
084                                                    }
085                                            }
086    
087                                            sb.append(charBuffer);
088    
089                                            i += byteBuffer.capacity() * 3 - 1;
090    
091                                            break;
092    
093                                    case CharPool.PLUS:
094                                            if (sb == null) {
095                                                    sb = new StringBuilder(encodedURLString.length());
096    
097                                                    if (i > 0) {
098                                                            sb.append(encodedURLString, 0, i);
099                                                    }
100                                            }
101    
102                                            sb.append(CharPool.SPACE);
103    
104                                            break;
105    
106                                    default:
107                                            if (sb != null) {
108                                                    sb.append(c);
109                                            }
110                            }
111                    }
112    
113                    if (sb == null) {
114                            return encodedURLString;
115                    }
116                    else {
117                            return sb.toString();
118                    }
119            }
120    
121            public static String encodeURL(String rawURLString) {
122                    return encodeURL(rawURLString, StringPool.UTF8, false);
123            }
124    
125            public static String encodeURL(String rawURLString, boolean escapeSpaces) {
126                    return encodeURL(rawURLString, StringPool.UTF8, escapeSpaces);
127            }
128    
129            public static String encodeURL(
130                    String rawURLString, String charsetName, boolean escapeSpaces) {
131    
132                    if (rawURLString == null) {
133                            return null;
134                    }
135    
136                    if (rawURLString.isEmpty()) {
137                            return StringPool.BLANK;
138                    }
139    
140                    StringBuilder sb = null;
141    
142                    CharsetEncoder charsetEncoder = null;
143    
144                    char[] hexes = new char[2];
145    
146                    for (int i = 0; i < rawURLString.length(); i++) {
147                            char c = rawURLString.charAt(i);
148    
149                            if (_validChars.get(c)) {
150                                    if (sb != null) {
151                                            sb.append(c);
152                                    }
153    
154                                    continue;
155                            }
156    
157                            if (sb == null) {
158                                    sb = new StringBuilder(rawURLString.length());
159    
160                                    sb.append(rawURLString, 0, i);
161                            }
162    
163                            // The cases are ordered by frequency and not alphabetically
164    
165                            switch (c) {
166                                    case CharPool.SLASH :
167                                            sb.append("%2F");
168    
169                                            continue;
170    
171                                    case CharPool.EQUAL :
172                                            sb.append("%3D");
173    
174                                            continue;
175    
176                                    case CharPool.AMPERSAND :
177                                            sb.append("%26");
178    
179                                            continue;
180    
181                                    case CharPool.PERCENT :
182                                            sb.append("%25");
183    
184                                            continue;
185    
186                                    case CharPool.SPACE :
187                                            if (escapeSpaces) {
188                                                    sb.append("%20");
189                                            }
190                                            else {
191                                                    sb.append(CharPool.PLUS);
192                                            }
193    
194                                            continue;
195    
196                                    case CharPool.PLUS :
197                                            sb.append("%2B");
198    
199                                            continue;
200    
201                                    case CharPool.COLON :
202                                            sb.append("%3A");
203    
204                                            continue;
205    
206                                    case CharPool.QUESTION :
207                                            sb.append("%3F");
208    
209                                            continue;
210                            }
211    
212                            CharBuffer charBuffer = _getRawCharBuffer(
213                                    rawURLString, i, escapeSpaces);
214    
215                            if (charsetEncoder == null) {
216                                    charsetEncoder = CharsetEncoderUtil.getCharsetEncoder(
217                                            charsetName);
218                            }
219    
220                            i += charBuffer.length() - 1;
221    
222                            ByteBuffer byteBuffer = null;
223    
224                            try {
225                                    byteBuffer = charsetEncoder.encode(charBuffer);
226                            }
227                            catch (CharacterCodingException cce) {
228                                    _log.error(cce, cce);
229    
230                                    return StringPool.BLANK;
231                            }
232    
233                            for (int j = byteBuffer.position(); j < byteBuffer.limit(); j++) {
234                                    sb.append(CharPool.PERCENT);
235    
236                                    sb.append(
237                                            UnicodeFormatter.byteToHex(byteBuffer.get(), hexes, true));
238                            }
239                    }
240    
241                    if (sb == null) {
242                            return rawURLString;
243                    }
244                    else {
245                            return sb.toString();
246                    }
247            }
248    
249            private static int _charToHex(char c) {
250                    if ((c >= CharPool.LOWER_CASE_A) && (c <= CharPool.LOWER_CASE_F)) {
251                            return c - CharPool.LOWER_CASE_A + 10;
252                    }
253    
254                    if ((c >= CharPool.UPPER_CASE_A) && (c <= CharPool.UPPER_CASE_F)) {
255                            return c - CharPool.UPPER_CASE_A + 10;
256                    }
257    
258                    if ((c >= CharPool.NUMBER_0) && (c <= CharPool.NUMBER_9)) {
259                            return c - CharPool.NUMBER_0;
260                    }
261    
262                    throw new IllegalArgumentException(c + " is not a hex char");
263            }
264    
265            private static ByteBuffer _getEncodedByteBuffer(
266                    String encodedString, int start) {
267    
268                    int count = 1;
269    
270                    for (int i = start + 3; i < encodedString.length(); i += 3) {
271                            if (encodedString.charAt(i) == CharPool.PERCENT) {
272                                    count++;
273                            }
274                            else {
275                                    break;
276                            }
277                    }
278    
279                    if (encodedString.length() < (start + count * 3)) {
280                            throw new IllegalArgumentException(
281                                    "Invalid URL encoding " + encodedString);
282                    }
283    
284                    ByteBuffer byteBuffer = ByteBuffer.allocate(count);
285    
286                    for (int i = start; i < start + count * 3; i += 3) {
287                            int high = _charToHex(encodedString.charAt(i + 1));
288                            int low = _charToHex(encodedString.charAt(i + 2));
289    
290                            byteBuffer.put((byte)((high << 4) + low));
291                    }
292    
293                    byteBuffer.flip();
294    
295                    return byteBuffer;
296            }
297    
298            private static CharBuffer _getRawCharBuffer(
299                    String rawString, int start, boolean escapeSpaces) {
300    
301                    int count = 0;
302    
303                    for (int i = start; i < rawString.length(); i++) {
304                            char rawChar = rawString.charAt(i);
305    
306                            if (!_validChars.get(rawChar) &&
307                                    (escapeSpaces || (rawChar != CharPool.SPACE))) {
308    
309                                    count++;
310    
311                                    if (Character.isHighSurrogate(rawChar)) {
312                                            if (((i + 1) < rawString.length()) &&
313                                                    Character.isLowSurrogate(rawString.charAt(i + 1))) {
314    
315                                                    i++;
316                                                    count++;
317                                            }
318                                    }
319                            }
320                            else {
321                                    break;
322                            }
323                    }
324    
325                    return CharBuffer.wrap(rawString, start, start + count);
326            }
327    
328            private static final Log _log = LogFactoryUtil.getLog(URLCodec.class);
329    
330            private static final BitSet _validChars = new BitSet(256);
331    
332            static {
333                    for (int i = 'a'; i <= 'z'; i++) {
334                            _validChars.set(i);
335                    }
336    
337                    for (int i = 'A'; i <= 'Z'; i++) {
338                            _validChars.set(i);
339                    }
340    
341                    for (int i = '0'; i <= '9'; i++) {
342                            _validChars.set(i);
343                    }
344    
345                    _validChars.set('-');
346                    _validChars.set('_');
347                    _validChars.set('.');
348                    _validChars.set('*');
349            }
350    
351    }