001    /**
002     * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.kernel.util;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.nio.charset.CharsetDecoderUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.CharacterCodingException;
025    import java.nio.charset.CharsetDecoder;
026    import java.nio.charset.CharsetEncoder;
027    
028    import java.util.BitSet;
029    
030    /**
031     * @author Shuyang Zhou
032     * @author Brian Wing Shun Chan
033     */
034    public class URLCodec {
035    
036            public static String decodeURL(String encodedURLString) {
037                    return decodeURL(encodedURLString, StringPool.UTF8, false);
038            }
039    
040            public static String decodeURL(
041                    String encodedURLString, boolean unescapeSpaces) {
042    
043                    return decodeURL(encodedURLString, StringPool.UTF8, unescapeSpaces);
044            }
045    
046            public static String decodeURL(
047                    String encodedURLString, String charsetName, boolean unescapeSpaces) {
048    
049                    if (encodedURLString == null) {
050                            return null;
051                    }
052    
053                    if (encodedURLString.length() == 0) {
054                            return StringPool.BLANK;
055                    }
056    
057                    /*if (unescapeSpaces) {
058                            encodedURLString = StringUtil.replace(
059                                    encodedURLString, "%20", StringPool.PLUS);
060                    }*/
061    
062                    StringBuilder sb = new StringBuilder(encodedURLString.length());
063    
064                    CharsetDecoder charsetDecoder = null;
065    
066                    boolean modified = false;
067    
068                    for (int i = 0; i < encodedURLString.length(); i++) {
069                            char c = encodedURLString.charAt(i);
070    
071                            if (c == CharPool.PERCENT) {
072                                    ByteBuffer byteBuffer = _getEncodedByteBuffer(
073                                            encodedURLString, i);
074    
075                                    if (charsetDecoder == null) {
076                                            charsetDecoder = CharsetDecoderUtil.getCharsetDecoder(
077                                                    charsetName);
078                                    }
079    
080                                    CharBuffer charBuffer = null;
081    
082                                    try {
083                                            charBuffer = charsetDecoder.decode(byteBuffer);
084                                    }
085                                    catch (CharacterCodingException cce) {
086                                            _log.error(cce, cce);
087    
088                                            return StringPool.BLANK;
089                                    }
090    
091                                    sb.append(charBuffer);
092    
093                                    i += byteBuffer.capacity() * 3 - 1;
094                            }
095                            else if (c == CharPool.PLUS) {
096                                    sb.append(CharPool.SPACE);
097    
098                                    modified = true;
099                            }
100                            else {
101                                    sb.append(c);
102                            }
103                    }
104    
105                    if (!modified && (sb.length() == encodedURLString.length())) {
106                            return encodedURLString;
107                    }
108                    else {
109                            return sb.toString();
110                    }
111            }
112    
113            public static String encodeURL(String rawURLString) {
114                    return encodeURL(rawURLString, StringPool.UTF8, false);
115            }
116    
117            public static String encodeURL(String rawURLString, boolean escapeSpaces) {
118                    return encodeURL(rawURLString, StringPool.UTF8, escapeSpaces);
119            }
120    
121            public static String encodeURL(
122                    String rawURLString, String charsetName, boolean escapeSpaces) {
123    
124                    if (rawURLString == null) {
125                            return null;
126                    }
127    
128                    if (rawURLString.length() == 0) {
129                            return StringPool.BLANK;
130                    }
131    
132                    StringBuilder sb = new StringBuilder(rawURLString.length());
133    
134                    CharsetEncoder charsetEncoder = null;
135    
136                    char[] hexes = new char[2];
137    
138                    boolean modified = false;
139    
140                    for (int i = 0; i < rawURLString.length(); i++) {
141                            char c = rawURLString.charAt(i);
142    
143                            if (_validChars.get(c)) {
144                                    sb.append(c);
145                            }
146                            else if (c == CharPool.SPACE) {
147                                    if (escapeSpaces) {
148                                            sb.append("%20");
149                                    }
150                                    else {
151                                            sb.append(CharPool.PLUS);
152                                    }
153    
154                                    modified = true;
155                            }
156                            else {
157                                    CharBuffer charBuffer = _getRawCharBuffer(rawURLString, i);
158    
159                                    if (charsetEncoder == null) {
160                                            charsetEncoder = CharsetEncoderUtil.getCharsetEncoder(
161                                                    charsetName);
162                                    }
163    
164                                    i += charBuffer.length() - 1;
165    
166                                    ByteBuffer byteBuffer = null;
167    
168                                    try {
169                                            byteBuffer = charsetEncoder.encode(charBuffer);
170                                    }
171                                    catch (CharacterCodingException cce) {
172                                            _log.error(cce, cce);
173    
174                                            return StringPool.BLANK;
175                                    }
176    
177                                    for (int j = byteBuffer.position(); j < byteBuffer.limit();
178                                                    j++) {
179    
180                                            sb.append(CharPool.PERCENT);
181    
182                                            String hex = new String(
183                                                    UnicodeFormatter.byteToHex(byteBuffer.get(), hexes));
184    
185                                            hex = hex.toUpperCase();
186    
187                                            sb.append(hex);
188                                    }
189                            }
190                    }
191    
192                    if (!modified && (sb.length() == rawURLString.length())) {
193                            return rawURLString;
194                    }
195                    else {
196                            return sb.toString();
197                    }
198            }
199    
200            private static int _charToHex(char c) {
201                    if ((c >= CharPool.LOWER_CASE_A) && (c <= CharPool.LOWER_CASE_Z)) {
202                            return c - CharPool.LOWER_CASE_A + 10;
203                    }
204    
205                    if ((c >= CharPool.UPPER_CASE_A) && (c <= CharPool.UPPER_CASE_Z)) {
206                            return c - CharPool.UPPER_CASE_A + 10;
207                    }
208    
209                    if ((c >= CharPool.NUMBER_0) && (c <= CharPool.NUMBER_9)) {
210                            return c - CharPool.NUMBER_0;
211                    }
212    
213                    throw new IllegalArgumentException(c + " is not a hex char");
214            }
215    
216            private static ByteBuffer _getEncodedByteBuffer(
217                    String encodedString, int start) {
218    
219                    int count = 1;
220    
221                    for (int i = start + 3; i < encodedString.length(); i += 3) {
222                            if (encodedString.charAt(i) == CharPool.PERCENT) {
223                                    count++;
224                            }
225                            else {
226                                    break;
227                            }
228                    }
229    
230                    ByteBuffer byteBuffer = ByteBuffer.allocate(count);
231    
232                    for (int i = start; i < start + count * 3; i += 3) {
233                            int high = _charToHex(encodedString.charAt(i + 1));
234                            int low = _charToHex(encodedString.charAt(i + 2));
235    
236                            byteBuffer.put((byte)((high << 4) + low));
237                    }
238    
239                    byteBuffer.flip();
240    
241                    return byteBuffer;
242            }
243    
244            private static CharBuffer _getRawCharBuffer(String rawString, int start) {
245                    int count = 0;
246    
247                    for (int i = start; i < rawString.length(); i++) {
248                            char rawChar = rawString.charAt(i);
249    
250                            if (!_validChars.get(rawChar)) {
251                                    count++;
252    
253                                    if (Character.isHighSurrogate(rawChar)) {
254                                            if (((i + 1) < rawString.length()) &&
255                                                    Character.isLowSurrogate(rawString.charAt(i + 1))) {
256    
257                                                    count++;
258                                            }
259                                    }
260                            }
261                            else {
262                                    break;
263                            }
264                    }
265    
266                    return CharBuffer.wrap(rawString, start, start + count);
267            }
268    
269            private static Log _log = LogFactoryUtil.getLog(URLCodec.class);
270    
271            private static BitSet _validChars = new BitSet(256);
272    
273            static {
274                    for (int i = 'a'; i <= 'z'; i++) {
275                            _validChars.set(i);
276                    }
277    
278                    for (int i = 'A'; i <= 'Z'; i++) {
279                            _validChars.set(i);
280                    }
281    
282                    for (int i = '0'; i <= '9'; i++) {
283                            _validChars.set(i);
284                    }
285    
286                    _validChars.set('-');
287                    _validChars.set('_');
288                    _validChars.set('.');
289                    _validChars.set('*');
290            }
291    
292    }