001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.SetUtil;
024    import com.liferay.portal.kernel.util.StreamUtil;
025    import com.liferay.portal.kernel.util.Validator;
026    
027    import java.io.File;
028    import java.io.FileNotFoundException;
029    import java.io.InputStream;
030    
031    import java.net.URL;
032    
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import javax.xml.parsers.DocumentBuilder;
040    import javax.xml.parsers.DocumentBuilderFactory;
041    
042    import org.apache.tika.detect.DefaultDetector;
043    import org.apache.tika.detect.Detector;
044    import org.apache.tika.io.CloseShieldInputStream;
045    import org.apache.tika.io.TikaInputStream;
046    import org.apache.tika.metadata.Metadata;
047    import org.apache.tika.mime.MediaType;
048    import org.apache.tika.mime.MimeTypesReaderMetKeys;
049    
050    import org.w3c.dom.Document;
051    import org.w3c.dom.Element;
052    import org.w3c.dom.Node;
053    import org.w3c.dom.NodeList;
054    
055    import org.xml.sax.InputSource;
056    
057    /**
058     * @author Jorge Ferrer
059     * @author Brian Wing Shun Chan
060     * @author Alexander Chow
061     */
062    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063    
064            public MimeTypesImpl() {
065                    _detector = new DefaultDetector(
066                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067    
068                    _webImageMimeTypes = SetUtil.fromArray(
069                            PropsValues.MIME_TYPES_WEB_IMAGES);
070    
071                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072                            "tika-mimetypes.xml");
073    
074                    try {
075                            read(url.openStream());
076                    }
077                    catch (Exception e) {
078                            _log.error("Unable to populate extensions map", e);
079                    }
080            }
081    
082            @Override
083            public String getContentType(File file) {
084                    return getContentType(file, file.getName());
085            }
086    
087            @Override
088            public String getContentType(File file, String fileName) {
089                    if ((file == null) || !file.exists()) {
090                            return getContentType(fileName);
091                    }
092    
093                    InputStream is = null;
094    
095                    try {
096                            is = TikaInputStream.get(file);
097    
098                            return getContentType(is, fileName);
099                    }
100                    catch (FileNotFoundException fnfe) {
101                            return getContentType(fileName);
102                    }
103                    finally {
104                            StreamUtil.cleanUp(is);
105                    }
106            }
107    
108            @Override
109            public String getContentType(InputStream inputStream, String fileName) {
110                    if (inputStream == null) {
111                            return getContentType(fileName);
112                    }
113    
114                    String contentType = null;
115    
116                    try {
117                            CloseShieldInputStream closeShieldInputStream =
118                                    new CloseShieldInputStream(inputStream);
119    
120                            Metadata metadata = new Metadata();
121    
122                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
123    
124                            MediaType mediaType = _detector.detect(
125                                    TikaInputStream.get(closeShieldInputStream), metadata);
126    
127                            contentType = mediaType.toString();
128    
129                            if (contentType.contains("tika")) {
130                                    if (_log.isDebugEnabled()) {
131                                            _log.debug("Retrieved invalid content type " + contentType);
132                                    }
133    
134                                    contentType = getContentType(fileName);
135                            }
136    
137                            if (contentType.contains("tika")) {
138                                    if (_log.isDebugEnabled()) {
139                                            _log.debug("Retrieved invalid content type " + contentType);
140                                    }
141    
142                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
143                            }
144                    }
145                    catch (Exception e) {
146                            _log.error(e, e);
147    
148                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
149                    }
150    
151                    return contentType;
152            }
153    
154            @Override
155            public String getContentType(String fileName) {
156                    if (Validator.isNull(fileName)) {
157                            return ContentTypes.APPLICATION_OCTET_STREAM;
158                    }
159    
160                    try {
161                            Metadata metadata = new Metadata();
162    
163                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
164    
165                            MediaType mediaType = _detector.detect(null, metadata);
166    
167                            String contentType = mediaType.toString();
168    
169                            if (!contentType.contains("tika")) {
170                                    return contentType;
171                            }
172                            else if (_log.isDebugEnabled()) {
173                                    _log.debug("Retrieved invalid content type " + contentType);
174                            }
175                    }
176                    catch (Exception e) {
177                            _log.error(e, e);
178                    }
179    
180                    return ContentTypes.APPLICATION_OCTET_STREAM;
181            }
182    
183            @Override
184            public String getExtensionContentType(String extension) {
185                    if (Validator.isNull(extension)) {
186                            return ContentTypes.APPLICATION_OCTET_STREAM;
187                    }
188    
189                    return getContentType("A.".concat(extension));
190            }
191    
192            @Override
193            public Set<String> getExtensions(String contentType) {
194                    Set<String> extensions = _extensionsMap.get(contentType);
195    
196                    if (extensions == null) {
197                            extensions = Collections.emptySet();
198                    }
199    
200                    return extensions;
201            }
202    
203            @Override
204            public boolean isWebImage(String mimeType) {
205                    return _webImageMimeTypes.contains(mimeType);
206            }
207    
208            protected void read(InputStream stream) throws Exception {
209                    DocumentBuilderFactory documentBuilderFactory =
210                            DocumentBuilderFactory.newInstance();
211    
212                    DocumentBuilder documentBuilder =
213                            documentBuilderFactory.newDocumentBuilder();
214    
215                    Document document = documentBuilder.parse(new InputSource(stream));
216    
217                    Element element = document.getDocumentElement();
218    
219                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
220                            throw new SystemException("Invalid configuration file");
221                    }
222    
223                    NodeList nodeList = element.getChildNodes();
224    
225                    for (int i = 0; i < nodeList.getLength(); i++) {
226                            Node node = nodeList.item(i);
227    
228                            if (node.getNodeType() != Node.ELEMENT_NODE) {
229                                    continue;
230                            }
231    
232                            Element childElement = (Element)node;
233    
234                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
235                                    readMimeType(childElement);
236                            }
237                    }
238            }
239    
240            protected void readMimeType(Element element) {
241                    Set<String> mimeTypes = new HashSet<String>();
242    
243                    Set<String> extensions = new HashSet<String>();
244    
245                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
246    
247                    mimeTypes.add(name);
248    
249                    NodeList nodeList = element.getChildNodes();
250    
251                    for (int i = 0; i < nodeList.getLength(); i++) {
252                            Node node = nodeList.item(i);
253    
254                            if (node.getNodeType() != Node.ELEMENT_NODE) {
255                                    continue;
256                            }
257    
258                            Element childElement = (Element)node;
259    
260                            if (ALIAS_TAG.equals(childElement.getTagName())) {
261                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
262    
263                                    mimeTypes.add(alias);
264                            }
265                            else if (GLOB_TAG.equals(childElement.getTagName())) {
266                                    boolean isRegex = GetterUtil.getBoolean(
267                                            childElement.getAttribute(ISREGEX_ATTR));
268    
269                                    if (isRegex) {
270                                            continue;
271                                    }
272    
273                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
274    
275                                    if (!pattern.startsWith("*")) {
276                                            continue;
277                                    }
278    
279                                    String extension = pattern.substring(1);
280    
281                                    if (!extension.contains("*") && !extension.contains("?") &&
282                                            !extension.contains("[")) {
283    
284                                            extensions.add(extension);
285                                    }
286                            }
287                    }
288    
289                    for (String mimeType : mimeTypes) {
290                            _extensionsMap.put(mimeType, extensions);
291                    }
292            }
293    
294            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
295    
296            private Detector _detector;
297            private Map<String, Set<String>> _extensionsMap =
298                    new HashMap<String, Set<String>>();
299            private Set<String> _webImageMimeTypes = new HashSet<String>();
300    
301    }