001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.StreamUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.io.File;
027 import java.io.FileNotFoundException;
028 import java.io.InputStream;
029
030 import java.net.URL;
031
032 import java.util.Collections;
033 import java.util.HashMap;
034 import java.util.HashSet;
035 import java.util.Map;
036 import java.util.Set;
037
038 import javax.xml.parsers.DocumentBuilder;
039 import javax.xml.parsers.DocumentBuilderFactory;
040
041 import org.apache.tika.detect.DefaultDetector;
042 import org.apache.tika.detect.Detector;
043 import org.apache.tika.io.TikaInputStream;
044 import org.apache.tika.metadata.Metadata;
045 import org.apache.tika.mime.MediaType;
046 import org.apache.tika.mime.MimeTypesReaderMetKeys;
047
048 import org.w3c.dom.Document;
049 import org.w3c.dom.Element;
050 import org.w3c.dom.Node;
051 import org.w3c.dom.NodeList;
052
053 import org.xml.sax.InputSource;
054
055
060 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
061
062 public MimeTypesImpl() {
063 _detector = new DefaultDetector(
064 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
065
066 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
067 "tika-mimetypes.xml");
068
069 try {
070 read(url.openStream());
071 }
072 catch (Exception e) {
073 _log.error("Unable to populate extensions map", e);
074 }
075 }
076
077 public String getContentType(File file) {
078 return getContentType(file, file.getName());
079 }
080
081 public String getContentType(File file, String fileName) {
082 if ((file == null) || !file.exists()) {
083 return getContentType(fileName);
084 }
085
086 InputStream is = null;
087
088 try {
089 is = TikaInputStream.get(file);
090
091 return getContentType(is, fileName);
092 }
093 catch (FileNotFoundException fnfe) {
094 return getContentType(fileName);
095 }
096 finally {
097 StreamUtil.cleanUp(is);
098 }
099 }
100
101 public String getContentType(InputStream inputStream, String fileName) {
102 if (inputStream == null) {
103 return getContentType(fileName);
104 }
105
106 String contentType = null;
107
108 try {
109 Metadata metadata = new Metadata();
110
111 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
112
113 MediaType mediaType = _detector.detect(
114 TikaInputStream.get(inputStream), metadata);
115
116 contentType = mediaType.toString();
117
118 if (contentType.contains("tika")) {
119 if (_log.isDebugEnabled()) {
120 _log.debug("Retrieved invalid content type " + contentType);
121 }
122
123 contentType = getContentType(fileName);
124 }
125
126 if (contentType.contains("tika")) {
127 if (_log.isDebugEnabled()) {
128 _log.debug("Retrieved invalid content type " + contentType);
129 }
130
131 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
132 }
133 }
134 catch (Exception e) {
135 _log.error(e, e);
136
137 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
138 }
139
140 return contentType;
141 }
142
143 public String getContentType(String fileName) {
144 if (Validator.isNull(fileName)) {
145 return ContentTypes.APPLICATION_OCTET_STREAM;
146 }
147
148 try {
149 Metadata metadata = new Metadata();
150
151 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
152
153 MediaType mediaType = _detector.detect(null, metadata);
154
155 String contentType = mediaType.toString();
156
157 if (!contentType.contains("tika")) {
158 return contentType;
159 }
160 else if (_log.isDebugEnabled()) {
161 _log.debug("Retrieved invalid content type " + contentType);
162 }
163 }
164 catch (Exception e) {
165 _log.error(e, e);
166 }
167
168 return ContentTypes.APPLICATION_OCTET_STREAM;
169 }
170
171 public Set<String> getExtensions(String contentType) {
172 Set<String> extensions = _extensionsMap.get(contentType);
173
174 if (extensions == null) {
175 extensions = Collections.emptySet();
176 }
177
178 return extensions;
179 }
180
181 protected void read(InputStream stream) throws Exception {
182 DocumentBuilderFactory documentBuilderFactory =
183 DocumentBuilderFactory.newInstance();
184
185 DocumentBuilder documentBuilder =
186 documentBuilderFactory.newDocumentBuilder();
187
188 Document document = documentBuilder.parse(new InputSource(stream));
189
190 Element element = document.getDocumentElement();
191
192 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
193 throw new SystemException("Invalid configuration file");
194 }
195
196 NodeList nodeList = element.getChildNodes();
197
198 for (int i = 0; i < nodeList.getLength(); i++) {
199 Node node = nodeList.item(i);
200
201 if (node.getNodeType() != Node.ELEMENT_NODE) {
202 continue;
203 }
204
205 Element childElement = (Element)node;
206
207 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
208 readMimeType(childElement);
209 }
210 }
211 }
212
213 protected void readMimeType(Element element) {
214 Set<String> mimeTypes = new HashSet<String>();
215
216 Set<String> extensions = new HashSet<String>();
217
218 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
219
220 mimeTypes.add(name);
221
222 NodeList nodeList = element.getChildNodes();
223
224 for (int i = 0; i < nodeList.getLength(); i++) {
225 Node node = nodeList.item(i);
226
227 if (node.getNodeType() != Node.ELEMENT_NODE) {
228 continue;
229 }
230
231 Element childElement = (Element)node;
232
233 if (ALIAS_TAG.equals(childElement.getTagName())) {
234 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
235
236 mimeTypes.add(alias);
237 }
238 else if (GLOB_TAG.equals(childElement.getTagName())) {
239 boolean isRegex = GetterUtil.getBoolean(
240 childElement.getAttribute(ISREGEX_ATTR));
241
242 if (isRegex) {
243 continue;
244 }
245
246 String pattern = childElement.getAttribute(PATTERN_ATTR);
247
248 if (!pattern.startsWith("*")) {
249 continue;
250 }
251
252 String extension = pattern.substring(1);
253
254 if (!extension.contains("*") && !extension.contains("?") &&
255 !extension.contains("[")) {
256
257 extensions.add(extension);
258 }
259 }
260 }
261
262 for (String mimeType : mimeTypes) {
263 _extensionsMap.put(mimeType, extensions);
264 }
265 }
266
267 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
268
269 private Detector _detector;
270 private Map<String, Set<String>> _extensionsMap =
271 new HashMap<String, Set<String>>();
272
273 }