001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.SetUtil;
024 import com.liferay.portal.kernel.util.StreamUtil;
025 import com.liferay.portal.kernel.util.Validator;
026
027 import java.io.File;
028 import java.io.FileNotFoundException;
029 import java.io.InputStream;
030
031 import java.net.URL;
032
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Map;
037 import java.util.Set;
038
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041
042 import org.apache.tika.detect.DefaultDetector;
043 import org.apache.tika.detect.Detector;
044 import org.apache.tika.io.CloseShieldInputStream;
045 import org.apache.tika.io.TikaInputStream;
046 import org.apache.tika.metadata.Metadata;
047 import org.apache.tika.mime.MediaType;
048 import org.apache.tika.mime.MimeTypesReaderMetKeys;
049
050 import org.w3c.dom.Document;
051 import org.w3c.dom.Element;
052 import org.w3c.dom.Node;
053 import org.w3c.dom.NodeList;
054
055 import org.xml.sax.InputSource;
056
057
062 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063
064 public MimeTypesImpl() {
065 _detector = new DefaultDetector(
066 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067
068 _webImageMimeTypes = SetUtil.fromArray(
069 PropsValues.MIME_TYPES_WEB_IMAGES);
070
071 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072 "tika-mimetypes.xml");
073
074 try {
075 read(url.openStream());
076 }
077 catch (Exception e) {
078 _log.error("Unable to populate extensions map", e);
079 }
080 }
081
082 @Override
083 public String getContentType(File file) {
084 return getContentType(file, file.getName());
085 }
086
087 @Override
088 public String getContentType(File file, String fileName) {
089 if ((file == null) || !file.exists()) {
090 return getContentType(fileName);
091 }
092
093 InputStream is = null;
094
095 try {
096 is = TikaInputStream.get(file);
097
098 return getContentType(is, fileName);
099 }
100 catch (FileNotFoundException fnfe) {
101 return getContentType(fileName);
102 }
103 finally {
104 StreamUtil.cleanUp(is);
105 }
106 }
107
108 @Override
109 public String getContentType(InputStream inputStream, String fileName) {
110 if (inputStream == null) {
111 return getContentType(fileName);
112 }
113
114 String contentType = null;
115
116 TikaInputStream tikaInputStream = null;
117
118 try {
119 tikaInputStream = TikaInputStream.get(
120 new CloseShieldInputStream(inputStream));
121
122 Metadata metadata = new Metadata();
123
124 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
125
126 MediaType mediaType = _detector.detect(tikaInputStream, metadata);
127
128 contentType = mediaType.toString();
129
130 if (contentType.contains("tika")) {
131 if (_log.isDebugEnabled()) {
132 _log.debug("Retrieved invalid content type " + contentType);
133 }
134
135 contentType = getContentType(fileName);
136 }
137
138 if (contentType.contains("tika")) {
139 if (_log.isDebugEnabled()) {
140 _log.debug("Retrieved invalid content type " + contentType);
141 }
142
143 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
144 }
145 }
146 catch (Exception e) {
147 _log.error(e, e);
148
149 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
150 }
151 finally {
152 StreamUtil.cleanUp(tikaInputStream);
153 }
154
155 return contentType;
156 }
157
158 @Override
159 public String getContentType(String fileName) {
160 if (Validator.isNull(fileName)) {
161 return ContentTypes.APPLICATION_OCTET_STREAM;
162 }
163
164 try {
165 Metadata metadata = new Metadata();
166
167 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
168
169 MediaType mediaType = _detector.detect(null, metadata);
170
171 String contentType = mediaType.toString();
172
173 if (!contentType.contains("tika")) {
174 return contentType;
175 }
176 else if (_log.isDebugEnabled()) {
177 _log.debug("Retrieved invalid content type " + contentType);
178 }
179 }
180 catch (Exception e) {
181 _log.error(e, e);
182 }
183
184 return ContentTypes.APPLICATION_OCTET_STREAM;
185 }
186
187 @Override
188 public String getExtensionContentType(String extension) {
189 if (Validator.isNull(extension)) {
190 return ContentTypes.APPLICATION_OCTET_STREAM;
191 }
192
193 return getContentType("A.".concat(extension));
194 }
195
196 @Override
197 public Set<String> getExtensions(String contentType) {
198 Set<String> extensions = _extensionsMap.get(contentType);
199
200 if (extensions == null) {
201 extensions = Collections.emptySet();
202 }
203
204 return extensions;
205 }
206
207 @Override
208 public boolean isWebImage(String mimeType) {
209 return _webImageMimeTypes.contains(mimeType);
210 }
211
212 protected void read(InputStream stream) throws Exception {
213 DocumentBuilderFactory documentBuilderFactory =
214 DocumentBuilderFactory.newInstance();
215
216 DocumentBuilder documentBuilder =
217 documentBuilderFactory.newDocumentBuilder();
218
219 Document document = documentBuilder.parse(new InputSource(stream));
220
221 Element element = document.getDocumentElement();
222
223 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
224 throw new SystemException("Invalid configuration file");
225 }
226
227 NodeList nodeList = element.getChildNodes();
228
229 for (int i = 0; i < nodeList.getLength(); i++) {
230 Node node = nodeList.item(i);
231
232 if (node.getNodeType() != Node.ELEMENT_NODE) {
233 continue;
234 }
235
236 Element childElement = (Element)node;
237
238 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
239 readMimeType(childElement);
240 }
241 }
242 }
243
244 protected void readMimeType(Element element) {
245 Set<String> mimeTypes = new HashSet<String>();
246
247 Set<String> extensions = new HashSet<String>();
248
249 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
250
251 mimeTypes.add(name);
252
253 NodeList nodeList = element.getChildNodes();
254
255 for (int i = 0; i < nodeList.getLength(); i++) {
256 Node node = nodeList.item(i);
257
258 if (node.getNodeType() != Node.ELEMENT_NODE) {
259 continue;
260 }
261
262 Element childElement = (Element)node;
263
264 if (ALIAS_TAG.equals(childElement.getTagName())) {
265 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
266
267 mimeTypes.add(alias);
268 }
269 else if (GLOB_TAG.equals(childElement.getTagName())) {
270 boolean isRegex = GetterUtil.getBoolean(
271 childElement.getAttribute(ISREGEX_ATTR));
272
273 if (isRegex) {
274 continue;
275 }
276
277 String pattern = childElement.getAttribute(PATTERN_ATTR);
278
279 if (!pattern.startsWith("*")) {
280 continue;
281 }
282
283 String extension = pattern.substring(1);
284
285 if (!extension.contains("*") && !extension.contains("?") &&
286 !extension.contains("[")) {
287
288 extensions.add(extension);
289 }
290 }
291 }
292
293 for (String mimeType : mimeTypes) {
294 _extensionsMap.put(mimeType, extensions);
295 }
296 }
297
298 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
299
300 private Detector _detector;
301 private Map<String, Set<String>> _extensionsMap =
302 new HashMap<String, Set<String>>();
303 private Set<String> _webImageMimeTypes = new HashSet<String>();
304
305 }