001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.SetUtil;
024 import com.liferay.portal.kernel.util.StreamUtil;
025 import com.liferay.portal.kernel.util.Validator;
026
027 import java.io.File;
028 import java.io.FileNotFoundException;
029 import java.io.InputStream;
030
031 import java.net.URL;
032
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Map;
037 import java.util.Set;
038
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041
042 import org.apache.tika.detect.DefaultDetector;
043 import org.apache.tika.detect.Detector;
044 import org.apache.tika.io.CloseShieldInputStream;
045 import org.apache.tika.io.TikaInputStream;
046 import org.apache.tika.metadata.Metadata;
047 import org.apache.tika.mime.MediaType;
048 import org.apache.tika.mime.MimeTypesReaderMetKeys;
049
050 import org.w3c.dom.Document;
051 import org.w3c.dom.Element;
052 import org.w3c.dom.Node;
053 import org.w3c.dom.NodeList;
054
055 import org.xml.sax.InputSource;
056
057
062 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063
064 public MimeTypesImpl() {
065 _detector = new DefaultDetector(
066 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067
068 _webImageMimeTypes = SetUtil.fromArray(
069 PropsValues.MIME_TYPES_WEB_IMAGES);
070
071 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072 "tika-mimetypes.xml");
073
074 try {
075 read(url.openStream());
076 }
077 catch (Exception e) {
078 _log.error("Unable to populate extensions map", e);
079 }
080 }
081
082 public String getContentType(File file) {
083 return getContentType(file, file.getName());
084 }
085
086 public String getContentType(File file, String fileName) {
087 if ((file == null) || !file.exists()) {
088 return getContentType(fileName);
089 }
090
091 InputStream is = null;
092
093 try {
094 is = TikaInputStream.get(file);
095
096 return getContentType(is, fileName);
097 }
098 catch (FileNotFoundException fnfe) {
099 return getContentType(fileName);
100 }
101 finally {
102 StreamUtil.cleanUp(is);
103 }
104 }
105
106 public String getContentType(InputStream inputStream, String fileName) {
107 if (inputStream == null) {
108 return getContentType(fileName);
109 }
110
111 String contentType = null;
112
113 try {
114 CloseShieldInputStream closeShieldInputStream =
115 new CloseShieldInputStream(inputStream);
116
117 Metadata metadata = new Metadata();
118
119 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
120
121 MediaType mediaType = _detector.detect(
122 TikaInputStream.get(closeShieldInputStream), metadata);
123
124 contentType = mediaType.toString();
125
126 if (contentType.contains("tika")) {
127 if (_log.isDebugEnabled()) {
128 _log.debug("Retrieved invalid content type " + contentType);
129 }
130
131 contentType = getContentType(fileName);
132 }
133
134 if (contentType.contains("tika")) {
135 if (_log.isDebugEnabled()) {
136 _log.debug("Retrieved invalid content type " + contentType);
137 }
138
139 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
140 }
141 }
142 catch (Exception e) {
143 _log.error(e, e);
144
145 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
146 }
147
148 return contentType;
149 }
150
151 public String getContentType(String fileName) {
152 if (Validator.isNull(fileName)) {
153 return ContentTypes.APPLICATION_OCTET_STREAM;
154 }
155
156 try {
157 Metadata metadata = new Metadata();
158
159 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
160
161 MediaType mediaType = _detector.detect(null, metadata);
162
163 String contentType = mediaType.toString();
164
165 if (!contentType.contains("tika")) {
166 return contentType;
167 }
168 else if (_log.isDebugEnabled()) {
169 _log.debug("Retrieved invalid content type " + contentType);
170 }
171 }
172 catch (Exception e) {
173 _log.error(e, e);
174 }
175
176 return ContentTypes.APPLICATION_OCTET_STREAM;
177 }
178
179 public String getExtensionContentType(String extension) {
180 if (Validator.isNull(extension)) {
181 return ContentTypes.APPLICATION_OCTET_STREAM;
182 }
183
184 return getContentType("A.".concat(extension));
185 }
186
187 public Set<String> getExtensions(String contentType) {
188 Set<String> extensions = _extensionsMap.get(contentType);
189
190 if (extensions == null) {
191 extensions = Collections.emptySet();
192 }
193
194 return extensions;
195 }
196
197 public boolean isWebImage(String mimeType) {
198 return _webImageMimeTypes.contains(mimeType);
199 }
200
201 protected void read(InputStream stream) throws Exception {
202 DocumentBuilderFactory documentBuilderFactory =
203 DocumentBuilderFactory.newInstance();
204
205 DocumentBuilder documentBuilder =
206 documentBuilderFactory.newDocumentBuilder();
207
208 Document document = documentBuilder.parse(new InputSource(stream));
209
210 Element element = document.getDocumentElement();
211
212 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
213 throw new SystemException("Invalid configuration file");
214 }
215
216 NodeList nodeList = element.getChildNodes();
217
218 for (int i = 0; i < nodeList.getLength(); i++) {
219 Node node = nodeList.item(i);
220
221 if (node.getNodeType() != Node.ELEMENT_NODE) {
222 continue;
223 }
224
225 Element childElement = (Element)node;
226
227 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
228 readMimeType(childElement);
229 }
230 }
231 }
232
233 protected void readMimeType(Element element) {
234 Set<String> mimeTypes = new HashSet<String>();
235
236 Set<String> extensions = new HashSet<String>();
237
238 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
239
240 mimeTypes.add(name);
241
242 NodeList nodeList = element.getChildNodes();
243
244 for (int i = 0; i < nodeList.getLength(); i++) {
245 Node node = nodeList.item(i);
246
247 if (node.getNodeType() != Node.ELEMENT_NODE) {
248 continue;
249 }
250
251 Element childElement = (Element)node;
252
253 if (ALIAS_TAG.equals(childElement.getTagName())) {
254 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
255
256 mimeTypes.add(alias);
257 }
258 else if (GLOB_TAG.equals(childElement.getTagName())) {
259 boolean isRegex = GetterUtil.getBoolean(
260 childElement.getAttribute(ISREGEX_ATTR));
261
262 if (isRegex) {
263 continue;
264 }
265
266 String pattern = childElement.getAttribute(PATTERN_ATTR);
267
268 if (!pattern.startsWith("*")) {
269 continue;
270 }
271
272 String extension = pattern.substring(1);
273
274 if (!extension.contains("*") && !extension.contains("?") &&
275 !extension.contains("[")) {
276
277 extensions.add(extension);
278 }
279 }
280 }
281
282 for (String mimeType : mimeTypes) {
283 _extensionsMap.put(mimeType, extensions);
284 }
285 }
286
287 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
288
289 private Detector _detector;
290 private Map<String, Set<String>> _extensionsMap =
291 new HashMap<String, Set<String>>();
292 private Set<String> _webImageMimeTypes = new HashSet<String>();
293
294 }