001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.StreamUtil;
024 import com.liferay.portal.kernel.util.Validator;
025
026 import java.io.File;
027 import java.io.FileNotFoundException;
028 import java.io.InputStream;
029
030 import java.net.URL;
031
032 import java.util.Collections;
033 import java.util.HashMap;
034 import java.util.HashSet;
035 import java.util.Map;
036 import java.util.Set;
037
038 import javax.xml.parsers.DocumentBuilder;
039 import javax.xml.parsers.DocumentBuilderFactory;
040
041 import org.apache.tika.detect.DefaultDetector;
042 import org.apache.tika.detect.Detector;
043 import org.apache.tika.io.TikaInputStream;
044 import org.apache.tika.metadata.Metadata;
045 import org.apache.tika.mime.MediaType;
046 import org.apache.tika.mime.MimeTypesReaderMetKeys;
047
048 import org.w3c.dom.Document;
049 import org.w3c.dom.Element;
050 import org.w3c.dom.Node;
051 import org.w3c.dom.NodeList;
052
053 import org.xml.sax.InputSource;
054
055
060 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
061
062 public MimeTypesImpl() {
063 _detector = new DefaultDetector(
064 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
065
066 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
067 "tika-mimetypes.xml");
068
069 try {
070 read(url.openStream());
071 }
072 catch (Exception e) {
073 _log.error("Unable to populate extensions map", e);
074 }
075 }
076
077 public String getContentType(File file) {
078 return getContentType(file, file.getName());
079 }
080
081 public String getContentType(File file, String title) {
082 InputStream is = null;
083
084 try {
085 is = TikaInputStream.get(file);
086
087 return getContentType(is, title);
088 }
089 catch (FileNotFoundException fnfe) {
090 return getContentType(title);
091 }
092 finally {
093 StreamUtil.cleanUp(is);
094 }
095 }
096
097 public String getContentType(InputStream inputStream, String fileName) {
098 if ((inputStream == null) && Validator.isNull(fileName)) {
099 return ContentTypes.APPLICATION_OCTET_STREAM;
100 }
101
102 String contentType = null;
103
104 try {
105 Metadata metadata = new Metadata();
106
107 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
108
109 MediaType mediaType = _detector.detect(
110 TikaInputStream.get(inputStream), metadata);
111
112 contentType = mediaType.toString();
113
114 if (contentType.contains("tika")) {
115 if (_log.isDebugEnabled()) {
116 _log.debug("Retrieved invalid content type " + contentType);
117 }
118
119 contentType = getContentType(fileName);
120 }
121
122 if (contentType.contains("tika")) {
123 if (_log.isDebugEnabled()) {
124 _log.debug("Retrieved invalid content type " + contentType);
125 }
126
127 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
128 }
129 }
130 catch (Exception e) {
131 _log.error(e, e);
132
133 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
134 }
135
136 return contentType;
137 }
138
139 public String getContentType(String fileName) {
140 if (Validator.isNull(fileName)) {
141 return ContentTypes.APPLICATION_OCTET_STREAM;
142 }
143
144 try {
145 Metadata metadata = new Metadata();
146
147 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
148
149 MediaType mediaType = _detector.detect(null, metadata);
150
151 String contentType = mediaType.toString();
152
153 if (!contentType.contains("tika")) {
154 return contentType;
155 }
156 else if (_log.isDebugEnabled()) {
157 _log.debug("Retrieved invalid content type " + contentType);
158 }
159 }
160 catch (Exception e) {
161 _log.error(e, e);
162 }
163
164 return ContentTypes.APPLICATION_OCTET_STREAM;
165 }
166
167 public Set<String> getExtensions(String contentType) {
168 Set<String> extensions = _extensionsMap.get(contentType);
169
170 if (extensions == null) {
171 extensions = Collections.emptySet();
172 }
173
174 return extensions;
175 }
176
177 protected void read(InputStream stream) throws Exception {
178 DocumentBuilderFactory documentBuilderFactory =
179 DocumentBuilderFactory.newInstance();
180
181 DocumentBuilder documentBuilder =
182 documentBuilderFactory.newDocumentBuilder();
183
184 Document document = documentBuilder.parse(new InputSource(stream));
185
186 Element element = document.getDocumentElement();
187
188 if ((element == null) ||
189 !MIME_INFO_TAG.equals(element.getTagName())) {
190
191 throw new SystemException("Invalid configuration file");
192 }
193
194 NodeList nodeList = element.getChildNodes();
195
196 for (int i = 0; i < nodeList.getLength(); i++) {
197 Node node = nodeList.item(i);
198
199 if (node.getNodeType() != Node.ELEMENT_NODE) {
200 continue;
201 }
202
203 Element childElement = (Element)node;
204
205 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
206 readMimeType(childElement);
207 }
208 }
209 }
210
211 protected void readMimeType(Element element) {
212 Set<String> mimeTypes = new HashSet<String>();
213
214 Set<String> extensions = new HashSet<String>();
215
216 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
217
218 mimeTypes.add(name);
219
220 NodeList nodeList = element.getChildNodes();
221
222 for (int i = 0; i < nodeList.getLength(); i++) {
223 Node node = nodeList.item(i);
224
225 if (node.getNodeType() != Node.ELEMENT_NODE) {
226 continue;
227 }
228
229 Element childElement = (Element)node;
230
231 if (ALIAS_TAG.equals(childElement.getTagName())) {
232 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
233
234 mimeTypes.add(alias);
235 }
236 else if (GLOB_TAG.equals(childElement.getTagName())) {
237 boolean isRegex = GetterUtil.getBoolean(
238 childElement.getAttribute(ISREGEX_ATTR));
239
240 if (isRegex) {
241 continue;
242 }
243
244 String pattern = childElement.getAttribute(PATTERN_ATTR);
245
246 if (!pattern.startsWith("*")) {
247 continue;
248 }
249
250 String extension = pattern.substring(1);
251
252 if (!extension.contains("*") && !extension.contains("?") &&
253 !extension.contains("[")) {
254
255 extensions.add(extension);
256 }
257 }
258 }
259
260 for (String mimeType : mimeTypes) {
261 _extensionsMap.put(mimeType, extensions);
262 }
263 }
264
265 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
266
267 private Detector _detector;
268 private Map<String, Set<String>> _extensionsMap =
269 new HashMap<String, Set<String>>();
270
271 }