001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.SetUtil;
024 import com.liferay.portal.kernel.util.StreamUtil;
025 import com.liferay.portal.kernel.util.Validator;
026
027 import java.io.File;
028 import java.io.FileNotFoundException;
029 import java.io.InputStream;
030
031 import java.net.URL;
032
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Map;
037 import java.util.Set;
038
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041
042 import org.apache.tika.detect.DefaultDetector;
043 import org.apache.tika.detect.Detector;
044 import org.apache.tika.io.CloseShieldInputStream;
045 import org.apache.tika.io.TikaInputStream;
046 import org.apache.tika.metadata.Metadata;
047 import org.apache.tika.mime.MediaType;
048 import org.apache.tika.mime.MimeTypesReaderMetKeys;
049
050 import org.w3c.dom.Document;
051 import org.w3c.dom.Element;
052 import org.w3c.dom.Node;
053 import org.w3c.dom.NodeList;
054
055 import org.xml.sax.InputSource;
056
057
062 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063
064 public MimeTypesImpl() {
065 _detector = new DefaultDetector(
066 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067
068 _webImageMimeTypes = SetUtil.fromArray(
069 PropsValues.MIME_TYPES_WEB_IMAGES);
070
071 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072 "tika-mimetypes.xml");
073
074 try {
075 read(url.openStream());
076 }
077 catch (Exception e) {
078 _log.error("Unable to populate extensions map", e);
079 }
080 }
081
082 public String getContentType(File file) {
083 return getContentType(file, file.getName());
084 }
085
086 public String getContentType(File file, String fileName) {
087 if ((file == null) || !file.exists()) {
088 return getContentType(fileName);
089 }
090
091 InputStream is = null;
092
093 try {
094 is = TikaInputStream.get(file);
095
096 return getContentType(is, fileName);
097 }
098 catch (FileNotFoundException fnfe) {
099 return getContentType(fileName);
100 }
101 finally {
102 StreamUtil.cleanUp(is);
103 }
104 }
105
106 public String getContentType(InputStream inputStream, String fileName) {
107 if (inputStream == null) {
108 return getContentType(fileName);
109 }
110
111 String contentType = null;
112
113 try {
114 CloseShieldInputStream closeShieldInputStream =
115 new CloseShieldInputStream(inputStream);
116
117 Metadata metadata = new Metadata();
118
119 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
120
121 MediaType mediaType = _detector.detect(
122 TikaInputStream.get(closeShieldInputStream), metadata);
123
124 contentType = mediaType.toString();
125
126 if (contentType.contains("tika")) {
127 if (_log.isDebugEnabled()) {
128 _log.debug("Retrieved invalid content type " + contentType);
129 }
130
131 contentType = getContentType(fileName);
132 }
133
134 if (contentType.contains("tika")) {
135 if (_log.isDebugEnabled()) {
136 _log.debug("Retrieved invalid content type " + contentType);
137 }
138
139 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
140 }
141 }
142 catch (Exception e) {
143 _log.error(e, e);
144
145 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
146 }
147
148 return contentType;
149 }
150
151 public String getContentType(String fileName) {
152 if (Validator.isNull(fileName)) {
153 return ContentTypes.APPLICATION_OCTET_STREAM;
154 }
155
156 try {
157 Metadata metadata = new Metadata();
158
159 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
160
161 MediaType mediaType = _detector.detect(null, metadata);
162
163 String contentType = mediaType.toString();
164
165 if (!contentType.contains("tika")) {
166 return contentType;
167 }
168 else if (_log.isDebugEnabled()) {
169 _log.debug("Retrieved invalid content type " + contentType);
170 }
171 }
172 catch (Exception e) {
173 _log.error(e, e);
174 }
175
176 return ContentTypes.APPLICATION_OCTET_STREAM;
177 }
178
179 public Set<String> getExtensions(String contentType) {
180 Set<String> extensions = _extensionsMap.get(contentType);
181
182 if (extensions == null) {
183 extensions = Collections.emptySet();
184 }
185
186 return extensions;
187 }
188
189 public boolean isWebImage(String mimeType) {
190 return _webImageMimeTypes.contains(mimeType);
191 }
192
193 protected void read(InputStream stream) throws Exception {
194 DocumentBuilderFactory documentBuilderFactory =
195 DocumentBuilderFactory.newInstance();
196
197 DocumentBuilder documentBuilder =
198 documentBuilderFactory.newDocumentBuilder();
199
200 Document document = documentBuilder.parse(new InputSource(stream));
201
202 Element element = document.getDocumentElement();
203
204 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
205 throw new SystemException("Invalid configuration file");
206 }
207
208 NodeList nodeList = element.getChildNodes();
209
210 for (int i = 0; i < nodeList.getLength(); i++) {
211 Node node = nodeList.item(i);
212
213 if (node.getNodeType() != Node.ELEMENT_NODE) {
214 continue;
215 }
216
217 Element childElement = (Element)node;
218
219 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
220 readMimeType(childElement);
221 }
222 }
223 }
224
225 protected void readMimeType(Element element) {
226 Set<String> mimeTypes = new HashSet<String>();
227
228 Set<String> extensions = new HashSet<String>();
229
230 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
231
232 mimeTypes.add(name);
233
234 NodeList nodeList = element.getChildNodes();
235
236 for (int i = 0; i < nodeList.getLength(); i++) {
237 Node node = nodeList.item(i);
238
239 if (node.getNodeType() != Node.ELEMENT_NODE) {
240 continue;
241 }
242
243 Element childElement = (Element)node;
244
245 if (ALIAS_TAG.equals(childElement.getTagName())) {
246 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
247
248 mimeTypes.add(alias);
249 }
250 else if (GLOB_TAG.equals(childElement.getTagName())) {
251 boolean isRegex = GetterUtil.getBoolean(
252 childElement.getAttribute(ISREGEX_ATTR));
253
254 if (isRegex) {
255 continue;
256 }
257
258 String pattern = childElement.getAttribute(PATTERN_ATTR);
259
260 if (!pattern.startsWith("*")) {
261 continue;
262 }
263
264 String extension = pattern.substring(1);
265
266 if (!extension.contains("*") && !extension.contains("?") &&
267 !extension.contains("[")) {
268
269 extensions.add(extension);
270 }
271 }
272 }
273
274 for (String mimeType : mimeTypes) {
275 _extensionsMap.put(mimeType, extensions);
276 }
277 }
278
279 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
280
281 private Detector _detector;
282 private Map<String, Set<String>> _extensionsMap =
283 new HashMap<String, Set<String>>();
284 private Set<String> _webImageMimeTypes = new HashSet<String>();
285
286 }