001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.util;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.util.ContentTypes;
021    import com.liferay.portal.kernel.util.GetterUtil;
022    import com.liferay.portal.kernel.util.MimeTypes;
023    import com.liferay.portal.kernel.util.StreamUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    
026    import java.io.File;
027    import java.io.FileNotFoundException;
028    import java.io.InputStream;
029    
030    import java.net.URL;
031    
032    import java.util.Collections;
033    import java.util.HashMap;
034    import java.util.HashSet;
035    import java.util.Map;
036    import java.util.Set;
037    
038    import javax.xml.parsers.DocumentBuilder;
039    import javax.xml.parsers.DocumentBuilderFactory;
040    
041    import org.apache.tika.detect.DefaultDetector;
042    import org.apache.tika.detect.Detector;
043    import org.apache.tika.io.TikaInputStream;
044    import org.apache.tika.metadata.Metadata;
045    import org.apache.tika.mime.MediaType;
046    import org.apache.tika.mime.MimeTypesReaderMetKeys;
047    
048    import org.w3c.dom.Document;
049    import org.w3c.dom.Element;
050    import org.w3c.dom.Node;
051    import org.w3c.dom.NodeList;
052    
053    import org.xml.sax.InputSource;
054    
055    /**
056     * @author Jorge Ferrer
057     * @author Brian Wing Shun Chan
058     * @author Alexander Chow
059     */
060    public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
061    
062            public MimeTypesImpl() {
063                    _detector = new DefaultDetector(
064                            org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
065    
066                    URL url = org.apache.tika.mime.MimeTypes.class.getResource(
067                            "tika-mimetypes.xml");
068    
069                    try {
070                            read(url.openStream());
071                    }
072                    catch (Exception e) {
073                            _log.error("Unable to populate extensions map", e);
074                    }
075            }
076    
077            public String getContentType(File file) {
078                    return getContentType(file, file.getName());
079            }
080    
081            public String getContentType(File file, String fileName) {
082                    if ((file == null) || !file.exists()) {
083                            return getContentType(fileName);
084                    }
085    
086                    InputStream is = null;
087    
088                    try {
089                            is = TikaInputStream.get(file);
090    
091                            return getContentType(is, fileName);
092                    }
093                    catch (FileNotFoundException fnfe) {
094                            return getContentType(fileName);
095                    }
096                    finally {
097                            StreamUtil.cleanUp(is);
098                    }
099            }
100    
101            public String getContentType(InputStream inputStream, String fileName) {
102                    if (inputStream == null) {
103                            return getContentType(fileName);
104                    }
105    
106                    String contentType = null;
107    
108                    try {
109                            Metadata metadata = new Metadata();
110    
111                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
112    
113                            MediaType mediaType = _detector.detect(
114                                    TikaInputStream.get(inputStream), metadata);
115    
116                            contentType = mediaType.toString();
117    
118                            if (contentType.contains("tika")) {
119                                    if (_log.isDebugEnabled()) {
120                                            _log.debug("Retrieved invalid content type " + contentType);
121                                    }
122    
123                                    contentType = getContentType(fileName);
124                            }
125    
126                            if (contentType.contains("tika")) {
127                                    if (_log.isDebugEnabled()) {
128                                            _log.debug("Retrieved invalid content type " + contentType);
129                                    }
130    
131                                    contentType = ContentTypes.APPLICATION_OCTET_STREAM;
132                            }
133                    }
134                    catch (Exception e) {
135                            _log.error(e, e);
136    
137                            contentType = ContentTypes.APPLICATION_OCTET_STREAM;
138                    }
139    
140                    return contentType;
141            }
142    
143            public String getContentType(String fileName) {
144                    if (Validator.isNull(fileName)) {
145                            return ContentTypes.APPLICATION_OCTET_STREAM;
146                    }
147    
148                    try {
149                            Metadata metadata = new Metadata();
150    
151                            metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
152    
153                            MediaType mediaType = _detector.detect(null, metadata);
154    
155                            String contentType = mediaType.toString();
156    
157                            if (!contentType.contains("tika")) {
158                                    return contentType;
159                            }
160                            else if (_log.isDebugEnabled()) {
161                                    _log.debug("Retrieved invalid content type " + contentType);
162                            }
163                    }
164                    catch (Exception e) {
165                            _log.error(e, e);
166                    }
167    
168                    return ContentTypes.APPLICATION_OCTET_STREAM;
169            }
170    
171            public Set<String> getExtensions(String contentType) {
172                    Set<String> extensions = _extensionsMap.get(contentType);
173    
174                    if (extensions == null) {
175                            extensions = Collections.emptySet();
176                    }
177    
178                    return extensions;
179            }
180    
181            protected void read(InputStream stream) throws Exception {
182                    DocumentBuilderFactory documentBuilderFactory =
183                            DocumentBuilderFactory.newInstance();
184    
185                    DocumentBuilder documentBuilder =
186                            documentBuilderFactory.newDocumentBuilder();
187    
188                    Document document = documentBuilder.parse(new InputSource(stream));
189    
190                    Element element = document.getDocumentElement();
191    
192                    if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
193                            throw new SystemException("Invalid configuration file");
194                    }
195    
196                    NodeList nodeList = element.getChildNodes();
197    
198                    for (int i = 0; i < nodeList.getLength(); i++) {
199                            Node node = nodeList.item(i);
200    
201                            if (node.getNodeType() != Node.ELEMENT_NODE) {
202                                    continue;
203                            }
204    
205                            Element childElement = (Element)node;
206    
207                            if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
208                                    readMimeType(childElement);
209                            }
210                    }
211            }
212    
213            protected void readMimeType(Element element) {
214                    Set<String> mimeTypes = new HashSet<String>();
215    
216                    Set<String> extensions = new HashSet<String>();
217    
218                    String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
219    
220                    mimeTypes.add(name);
221    
222                    NodeList nodeList = element.getChildNodes();
223    
224                    for (int i = 0; i < nodeList.getLength(); i++) {
225                            Node node = nodeList.item(i);
226    
227                            if (node.getNodeType() != Node.ELEMENT_NODE) {
228                                    continue;
229                            }
230    
231                            Element childElement = (Element)node;
232    
233                            if (ALIAS_TAG.equals(childElement.getTagName())) {
234                                    String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
235    
236                                    mimeTypes.add(alias);
237                            }
238                            else if (GLOB_TAG.equals(childElement.getTagName())) {
239                                    boolean isRegex = GetterUtil.getBoolean(
240                                            childElement.getAttribute(ISREGEX_ATTR));
241    
242                                    if (isRegex) {
243                                            continue;
244                                    }
245    
246                                    String pattern = childElement.getAttribute(PATTERN_ATTR);
247    
248                                    if (!pattern.startsWith("*")) {
249                                            continue;
250                                    }
251    
252                                    String extension = pattern.substring(1);
253    
254                                    if (!extension.contains("*") && !extension.contains("?") &&
255                                            !extension.contains("[")) {
256    
257                                            extensions.add(extension);
258                                    }
259                            }
260                    }
261    
262                    for (String mimeType : mimeTypes) {
263                            _extensionsMap.put(mimeType, extensions);
264                    }
265            }
266    
267            private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
268    
269            private Detector _detector;
270            private Map<String, Set<String>> _extensionsMap =
271                    new HashMap<String, Set<String>>();
272    
273    }