001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.SetUtil;
024    import com.liferay.portal.kernel.util.StreamUtil;
025    import com.liferay.portal.kernel.util.Validator;
026    
027    import java.io.File;
028    import java.io.FileNotFoundException;
029    import java.io.InputStream;
030    
031    import java.net.URL;
032    
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import javax.xml.parsers.DocumentBuilder;
040    import javax.xml.parsers.DocumentBuilderFactory;
041    
042    import org.apache.tika.detect.DefaultDetector;
043    import org.apache.tika.detect.Detector;
044    import org.apache.tika.io.CloseShieldInputStream;
045    import org.apache.tika.io.TikaInputStream;
046    import org.apache.tika.metadata.Metadata;
047    import org.apache.tika.mime.MediaType;
048    import org.apache.tika.mime.MimeTypesReaderMetKeys;
049    
050    import org.w3c.dom.Document;
051    import org.w3c.dom.Element;
052    import org.w3c.dom.Node;
053    import org.w3c.dom.NodeList;
054    
055    import org.xml.sax.InputSource;
056    
057    /**
058     * @author Jorge Ferrer
059     * @author Brian Wing Shun Chan
060     * @author Alexander Chow
061     */
062    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063    
064            public MimeTypesImpl() {
065                    _detector = new DefaultDetector(
066                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067    
068                    _webImageMimeTypes = SetUtil.fromArray(
069                            PropsValues.MIME_TYPES_WEB_IMAGES);
070    
071                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072                            "tika-mimetypes.xml");
073    
074                    try {
075                            read(url.openStream());
076                    }
077                    catch (Exception e) {
078                            _log.error("Unable to populate extensions map", e);
079                    }
080            }
081    
082            @Override
083            public String getContentType(File file) {
084                    return getContentType(file, file.getName());
085            }
086    
087            @Override
088            public String getContentType(File file, String fileName) {
089                    if ((file == null) || !file.exists()) {
090                            return getContentType(fileName);
091                    }
092    
093                    InputStream is = null;
094    
095                    try {
096                            is = TikaInputStream.get(file);
097    
098                            return getContentType(is, fileName);
099                    }
100                    catch (FileNotFoundException fnfe) {
101                            return getContentType(fileName);
102                    }
103                    finally {
104                            StreamUtil.cleanUp(is);
105                    }
106            }
107    
108            @Override
109            public String getContentType(InputStream inputStream, String fileName) {
110                    if (inputStream == null) {
111                            return getContentType(fileName);
112                    }
113    
114                    String contentType = null;
115    
116                    TikaInputStream tikaInputStream = null;
117    
118                    try {
119                            tikaInputStream = TikaInputStream.get(
120                                    new CloseShieldInputStream(inputStream));
121    
122                            Metadata metadata = new Metadata();
123    
124                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
125    
126                            MediaType mediaType = _detector.detect(tikaInputStream, metadata);
127    
128                            contentType = mediaType.toString();
129    
130                            if (contentType.contains("tika")) {
131                                    if (_log.isDebugEnabled()) {
132                                            _log.debug("Retrieved invalid content type " + contentType);
133                                    }
134    
135                                    contentType = getContentType(fileName);
136                            }
137    
138                            if (contentType.contains("tika")) {
139                                    if (_log.isDebugEnabled()) {
140                                            _log.debug("Retrieved invalid content type " + contentType);
141                                    }
142    
143                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
144                            }
145                    }
146                    catch (Exception e) {
147                            _log.error(e, e);
148    
149                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
150                    }
151                    finally {
152                            StreamUtil.cleanUp(tikaInputStream);
153                    }
154    
155                    return contentType;
156            }
157    
158            @Override
159            public String getContentType(String fileName) {
160                    if (Validator.isNull(fileName)) {
161                            return ContentTypes.APPLICATION_OCTET_STREAM;
162                    }
163    
164                    try {
165                            Metadata metadata = new Metadata();
166    
167                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
168    
169                            MediaType mediaType = _detector.detect(null, metadata);
170    
171                            String contentType = mediaType.toString();
172    
173                            if (!contentType.contains("tika")) {
174                                    return contentType;
175                            }
176                            else if (_log.isDebugEnabled()) {
177                                    _log.debug("Retrieved invalid content type " + contentType);
178                            }
179                    }
180                    catch (Exception e) {
181                            _log.error(e, e);
182                    }
183    
184                    return ContentTypes.APPLICATION_OCTET_STREAM;
185            }
186    
187            @Override
188            public String getExtensionContentType(String extension) {
189                    if (Validator.isNull(extension)) {
190                            return ContentTypes.APPLICATION_OCTET_STREAM;
191                    }
192    
193                    return getContentType("A.".concat(extension));
194            }
195    
196            @Override
197            public Set<String> getExtensions(String contentType) {
198                    Set<String> extensions = _extensionsMap.get(contentType);
199    
200                    if (extensions == null) {
201                            extensions = Collections.emptySet();
202                    }
203    
204                    return extensions;
205            }
206    
207            @Override
208            public boolean isWebImage(String mimeType) {
209                    return _webImageMimeTypes.contains(mimeType);
210            }
211    
212            protected void read(InputStream stream) throws Exception {
213                    DocumentBuilderFactory documentBuilderFactory =
214                            DocumentBuilderFactory.newInstance();
215    
216                    DocumentBuilder documentBuilder =
217                            documentBuilderFactory.newDocumentBuilder();
218    
219                    Document document = documentBuilder.parse(new InputSource(stream));
220    
221                    Element element = document.getDocumentElement();
222    
223                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
224                            throw new SystemException("Invalid configuration file");
225                    }
226    
227                    NodeList nodeList = element.getChildNodes();
228    
229                    for (int i = 0; i < nodeList.getLength(); i++) {
230                            Node node = nodeList.item(i);
231    
232                            if (node.getNodeType() != Node.ELEMENT_NODE) {
233                                    continue;
234                            }
235    
236                            Element childElement = (Element)node;
237    
238                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
239                                    readMimeType(childElement);
240                            }
241                    }
242            }
243    
244            protected void readMimeType(Element element) {
245                    Set<String> mimeTypes = new HashSet<String>();
246    
247                    Set<String> extensions = new HashSet<String>();
248    
249                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
250    
251                    mimeTypes.add(name);
252    
253                    NodeList nodeList = element.getChildNodes();
254    
255                    for (int i = 0; i < nodeList.getLength(); i++) {
256                            Node node = nodeList.item(i);
257    
258                            if (node.getNodeType() != Node.ELEMENT_NODE) {
259                                    continue;
260                            }
261    
262                            Element childElement = (Element)node;
263    
264                            if (ALIAS_TAG.equals(childElement.getTagName())) {
265                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
266    
267                                    mimeTypes.add(alias);
268                            }
269                            else if (GLOB_TAG.equals(childElement.getTagName())) {
270                                    boolean isRegex = GetterUtil.getBoolean(
271                                            childElement.getAttribute(ISREGEX_ATTR));
272    
273                                    if (isRegex) {
274                                            continue;
275                                    }
276    
277                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
278    
279                                    if (!pattern.startsWith("*")) {
280                                            continue;
281                                    }
282    
283                                    String extension = pattern.substring(1);
284    
285                                    if (!extension.contains("*") && !extension.contains("?") &&
286                                            !extension.contains("[")) {
287    
288                                            extensions.add(extension);
289                                    }
290                            }
291                    }
292    
293                    for (String mimeType : mimeTypes) {
294                            _extensionsMap.put(mimeType, extensions);
295                    }
296            }
297    
298            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
299    
300            private Detector _detector;
301            private Map<String, Set<String>> _extensionsMap =
302                    new HashMap<String, Set<String>>();
303            private Set<String> _webImageMimeTypes = new HashSet<String>();
304    
305    }