001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.SetUtil;
024    import com.liferay.portal.kernel.util.StreamUtil;
025    import com.liferay.portal.kernel.util.Validator;
026    
027    import java.io.File;
028    import java.io.FileNotFoundException;
029    import java.io.InputStream;
030    
031    import java.net.URL;
032    
033    import java.util.Collections;
034    import java.util.HashMap;
035    import java.util.HashSet;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import javax.xml.parsers.DocumentBuilder;
040    import javax.xml.parsers.DocumentBuilderFactory;
041    
042    import org.apache.tika.detect.DefaultDetector;
043    import org.apache.tika.detect.Detector;
044    import org.apache.tika.io.CloseShieldInputStream;
045    import org.apache.tika.io.TikaInputStream;
046    import org.apache.tika.metadata.Metadata;
047    import org.apache.tika.mime.MediaType;
048    import org.apache.tika.mime.MimeTypesReaderMetKeys;
049    
050    import org.w3c.dom.Document;
051    import org.w3c.dom.Element;
052    import org.w3c.dom.Node;
053    import org.w3c.dom.NodeList;
054    
055    import org.xml.sax.InputSource;
056    
057    /**
058     * @author Jorge Ferrer
059     * @author Brian Wing Shun Chan
060     * @author Alexander Chow
061     */
062    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063    
064            public MimeTypesImpl() {
065                    _detector = new DefaultDetector(
066                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067    
068                    _webImageMimeTypes = SetUtil.fromArray(
069                            PropsValues.MIME_TYPES_WEB_IMAGES);
070    
071                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072                            "tika-mimetypes.xml");
073    
074                    try {
075                            read(url.openStream());
076                    }
077                    catch (Exception e) {
078                            _log.error("Unable to populate extensions map", e);
079                    }
080            }
081    
082            public String getContentType(File file) {
083                    return getContentType(file, file.getName());
084            }
085    
086            public String getContentType(File file, String fileName) {
087                    if ((file == null) || !file.exists()) {
088                            return getContentType(fileName);
089                    }
090    
091                    InputStream is = null;
092    
093                    try {
094                            is = TikaInputStream.get(file);
095    
096                            return getContentType(is, fileName);
097                    }
098                    catch (FileNotFoundException fnfe) {
099                            return getContentType(fileName);
100                    }
101                    finally {
102                            StreamUtil.cleanUp(is);
103                    }
104            }
105    
106            public String getContentType(InputStream inputStream, String fileName) {
107                    if (inputStream == null) {
108                            return getContentType(fileName);
109                    }
110    
111                    String contentType = null;
112    
113                    try {
114                            CloseShieldInputStream closeShieldInputStream =
115                                    new CloseShieldInputStream(inputStream);
116    
117                            Metadata metadata = new Metadata();
118    
119                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
120    
121                            MediaType mediaType = _detector.detect(
122                                    TikaInputStream.get(closeShieldInputStream), metadata);
123    
124                            contentType = mediaType.toString();
125    
126                            if (contentType.contains("tika")) {
127                                    if (_log.isDebugEnabled()) {
128                                            _log.debug("Retrieved invalid content type " + contentType);
129                                    }
130    
131                                    contentType = getContentType(fileName);
132                            }
133    
134                            if (contentType.contains("tika")) {
135                                    if (_log.isDebugEnabled()) {
136                                            _log.debug("Retrieved invalid content type " + contentType);
137                                    }
138    
139                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
140                            }
141                    }
142                    catch (Exception e) {
143                            _log.error(e, e);
144    
145                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
146                    }
147    
148                    return contentType;
149            }
150    
151            public String getContentType(String fileName) {
152                    if (Validator.isNull(fileName)) {
153                            return ContentTypes.APPLICATION_OCTET_STREAM;
154                    }
155    
156                    try {
157                            Metadata metadata = new Metadata();
158    
159                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
160    
161                            MediaType mediaType = _detector.detect(null, metadata);
162    
163                            String contentType = mediaType.toString();
164    
165                            if (!contentType.contains("tika")) {
166                                    return contentType;
167                            }
168                            else if (_log.isDebugEnabled()) {
169                                    _log.debug("Retrieved invalid content type " + contentType);
170                            }
171                    }
172                    catch (Exception e) {
173                            _log.error(e, e);
174                    }
175    
176                    return ContentTypes.APPLICATION_OCTET_STREAM;
177            }
178    
179            public String getExtensionContentType(String extension) {
180                    if (Validator.isNull(extension)) {
181                            return ContentTypes.APPLICATION_OCTET_STREAM;
182                    }
183    
184                    return getContentType("A.".concat(extension));
185            }
186    
187            public Set<String> getExtensions(String contentType) {
188                    Set<String> extensions = _extensionsMap.get(contentType);
189    
190                    if (extensions == null) {
191                            extensions = Collections.emptySet();
192                    }
193    
194                    return extensions;
195            }
196    
197            public boolean isWebImage(String mimeType) {
198                    return _webImageMimeTypes.contains(mimeType);
199            }
200    
201            protected void read(InputStream stream) throws Exception {
202                    DocumentBuilderFactory documentBuilderFactory =
203                            DocumentBuilderFactory.newInstance();
204    
205                    DocumentBuilder documentBuilder =
206                            documentBuilderFactory.newDocumentBuilder();
207    
208                    Document document = documentBuilder.parse(new InputSource(stream));
209    
210                    Element element = document.getDocumentElement();
211    
212                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
213                            throw new SystemException("Invalid configuration file");
214                    }
215    
216                    NodeList nodeList = element.getChildNodes();
217    
218                    for (int i = 0; i < nodeList.getLength(); i++) {
219                            Node node = nodeList.item(i);
220    
221                            if (node.getNodeType() != Node.ELEMENT_NODE) {
222                                    continue;
223                            }
224    
225                            Element childElement = (Element)node;
226    
227                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
228                                    readMimeType(childElement);
229                            }
230                    }
231            }
232    
233            protected void readMimeType(Element element) {
234                    Set<String> mimeTypes = new HashSet<String>();
235    
236                    Set<String> extensions = new HashSet<String>();
237    
238                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
239    
240                    mimeTypes.add(name);
241    
242                    NodeList nodeList = element.getChildNodes();
243    
244                    for (int i = 0; i < nodeList.getLength(); i++) {
245                            Node node = nodeList.item(i);
246    
247                            if (node.getNodeType() != Node.ELEMENT_NODE) {
248                                    continue;
249                            }
250    
251                            Element childElement = (Element)node;
252    
253                            if (ALIAS_TAG.equals(childElement.getTagName())) {
254                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
255    
256                                    mimeTypes.add(alias);
257                            }
258                            else if (GLOB_TAG.equals(childElement.getTagName())) {
259                                    boolean isRegex = GetterUtil.getBoolean(
260                                            childElement.getAttribute(ISREGEX_ATTR));
261    
262                                    if (isRegex) {
263                                            continue;
264                                    }
265    
266                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
267    
268                                    if (!pattern.startsWith("*")) {
269                                            continue;
270                                    }
271    
272                                    String extension = pattern.substring(1);
273    
274                                    if (!extension.contains("*") && !extension.contains("?") &&
275                                            !extension.contains("[")) {
276    
277                                            extensions.add(extension);
278                                    }
279                            }
280                    }
281    
282                    for (String mimeType : mimeTypes) {
283                            _extensionsMap.put(mimeType, extensions);
284                    }
285            }
286    
287            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
288    
289            private Detector _detector;
290            private Map<String, Set<String>> _extensionsMap =
291                    new HashMap<String, Set<String>>();
292            private Set<String> _webImageMimeTypes = new HashSet<String>();
293    
294    }