001
014
015 package com.liferay.portal.util;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.util.ContentTypes;
021 import com.liferay.portal.kernel.util.GetterUtil;
022 import com.liferay.portal.kernel.util.MimeTypes;
023 import com.liferay.portal.kernel.util.SetUtil;
024 import com.liferay.portal.kernel.util.StreamUtil;
025 import com.liferay.portal.kernel.util.Validator;
026
027 import java.io.File;
028 import java.io.FileNotFoundException;
029 import java.io.InputStream;
030
031 import java.net.URL;
032
033 import java.util.Collections;
034 import java.util.HashMap;
035 import java.util.HashSet;
036 import java.util.Map;
037 import java.util.Set;
038
039 import javax.xml.parsers.DocumentBuilder;
040 import javax.xml.parsers.DocumentBuilderFactory;
041
042 import org.apache.tika.detect.DefaultDetector;
043 import org.apache.tika.detect.Detector;
044 import org.apache.tika.io.CloseShieldInputStream;
045 import org.apache.tika.io.TikaInputStream;
046 import org.apache.tika.metadata.Metadata;
047 import org.apache.tika.mime.MediaType;
048 import org.apache.tika.mime.MimeTypesReaderMetKeys;
049
050 import org.w3c.dom.Document;
051 import org.w3c.dom.Element;
052 import org.w3c.dom.Node;
053 import org.w3c.dom.NodeList;
054
055 import org.xml.sax.InputSource;
056
057
062 public class MimeTypesImpl implements MimeTypes, MimeTypesReaderMetKeys {
063
064 public MimeTypesImpl() {
065 _detector = new DefaultDetector(
066 org.apache.tika.mime.MimeTypes.getDefaultMimeTypes());
067
068 _webImageMimeTypes = SetUtil.fromArray(
069 PropsValues.MIME_TYPES_WEB_IMAGES);
070
071 URL url = org.apache.tika.mime.MimeTypes.class.getResource(
072 "tika-mimetypes.xml");
073
074 try {
075 read(url.openStream());
076 }
077 catch (Exception e) {
078 _log.error("Unable to populate extensions map", e);
079 }
080 }
081
082 @Override
083 public String getContentType(File file) {
084 return getContentType(file, file.getName());
085 }
086
087 @Override
088 public String getContentType(File file, String fileName) {
089 if ((file == null) || !file.exists()) {
090 return getContentType(fileName);
091 }
092
093 InputStream is = null;
094
095 try {
096 is = TikaInputStream.get(file);
097
098 return getContentType(is, fileName);
099 }
100 catch (FileNotFoundException fnfe) {
101 return getContentType(fileName);
102 }
103 finally {
104 StreamUtil.cleanUp(is);
105 }
106 }
107
108 @Override
109 public String getContentType(InputStream inputStream, String fileName) {
110 if (inputStream == null) {
111 return getContentType(fileName);
112 }
113
114 String contentType = null;
115
116 try {
117 CloseShieldInputStream closeShieldInputStream =
118 new CloseShieldInputStream(inputStream);
119
120 Metadata metadata = new Metadata();
121
122 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
123
124 MediaType mediaType = _detector.detect(
125 TikaInputStream.get(closeShieldInputStream), metadata);
126
127 contentType = mediaType.toString();
128
129 if (contentType.contains("tika")) {
130 if (_log.isDebugEnabled()) {
131 _log.debug("Retrieved invalid content type " + contentType);
132 }
133
134 contentType = getContentType(fileName);
135 }
136
137 if (contentType.contains("tika")) {
138 if (_log.isDebugEnabled()) {
139 _log.debug("Retrieved invalid content type " + contentType);
140 }
141
142 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
143 }
144 }
145 catch (Exception e) {
146 _log.error(e, e);
147
148 contentType = ContentTypes.APPLICATION_OCTET_STREAM;
149 }
150
151 return contentType;
152 }
153
154 @Override
155 public String getContentType(String fileName) {
156 if (Validator.isNull(fileName)) {
157 return ContentTypes.APPLICATION_OCTET_STREAM;
158 }
159
160 try {
161 Metadata metadata = new Metadata();
162
163 metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
164
165 MediaType mediaType = _detector.detect(null, metadata);
166
167 String contentType = mediaType.toString();
168
169 if (!contentType.contains("tika")) {
170 return contentType;
171 }
172 else if (_log.isDebugEnabled()) {
173 _log.debug("Retrieved invalid content type " + contentType);
174 }
175 }
176 catch (Exception e) {
177 _log.error(e, e);
178 }
179
180 return ContentTypes.APPLICATION_OCTET_STREAM;
181 }
182
183 @Override
184 public String getExtensionContentType(String extension) {
185 if (Validator.isNull(extension)) {
186 return ContentTypes.APPLICATION_OCTET_STREAM;
187 }
188
189 return getContentType("A.".concat(extension));
190 }
191
192 @Override
193 public Set<String> getExtensions(String contentType) {
194 Set<String> extensions = _extensionsMap.get(contentType);
195
196 if (extensions == null) {
197 extensions = Collections.emptySet();
198 }
199
200 return extensions;
201 }
202
203 @Override
204 public boolean isWebImage(String mimeType) {
205 return _webImageMimeTypes.contains(mimeType);
206 }
207
208 protected void read(InputStream stream) throws Exception {
209 DocumentBuilderFactory documentBuilderFactory =
210 DocumentBuilderFactory.newInstance();
211
212 DocumentBuilder documentBuilder =
213 documentBuilderFactory.newDocumentBuilder();
214
215 Document document = documentBuilder.parse(new InputSource(stream));
216
217 Element element = document.getDocumentElement();
218
219 if ((element == null) || !MIME_INFO_TAG.equals(element.getTagName())) {
220 throw new SystemException("Invalid configuration file");
221 }
222
223 NodeList nodeList = element.getChildNodes();
224
225 for (int i = 0; i < nodeList.getLength(); i++) {
226 Node node = nodeList.item(i);
227
228 if (node.getNodeType() != Node.ELEMENT_NODE) {
229 continue;
230 }
231
232 Element childElement = (Element)node;
233
234 if (MIME_TYPE_TAG.equals(childElement.getTagName())) {
235 readMimeType(childElement);
236 }
237 }
238 }
239
240 protected void readMimeType(Element element) {
241 Set<String> mimeTypes = new HashSet<String>();
242
243 Set<String> extensions = new HashSet<String>();
244
245 String name = element.getAttribute(MIME_TYPE_TYPE_ATTR);
246
247 mimeTypes.add(name);
248
249 NodeList nodeList = element.getChildNodes();
250
251 for (int i = 0; i < nodeList.getLength(); i++) {
252 Node node = nodeList.item(i);
253
254 if (node.getNodeType() != Node.ELEMENT_NODE) {
255 continue;
256 }
257
258 Element childElement = (Element)node;
259
260 if (ALIAS_TAG.equals(childElement.getTagName())) {
261 String alias = childElement.getAttribute(ALIAS_TYPE_ATTR);
262
263 mimeTypes.add(alias);
264 }
265 else if (GLOB_TAG.equals(childElement.getTagName())) {
266 boolean isRegex = GetterUtil.getBoolean(
267 childElement.getAttribute(ISREGEX_ATTR));
268
269 if (isRegex) {
270 continue;
271 }
272
273 String pattern = childElement.getAttribute(PATTERN_ATTR);
274
275 if (!pattern.startsWith("*")) {
276 continue;
277 }
278
279 String extension = pattern.substring(1);
280
281 if (!extension.contains("*") && !extension.contains("?") &&
282 !extension.contains("[")) {
283
284 extensions.add(extension);
285 }
286 }
287 }
288
289 for (String mimeType : mimeTypes) {
290 _extensionsMap.put(mimeType, extensions);
291 }
292 }
293
294 private static Log _log = LogFactoryUtil.getLog(MimeTypesImpl.class);
295
296 private Detector _detector;
297 private Map<String, Set<String>> _extensionsMap =
298 new HashMap<String, Set<String>>();
299 private Set<String> _webImageMimeTypes = new HashSet<String>();
300
301 }