001
014
015 package com.liferay.portal.metadata;
016
017 import com.liferay.portal.kernel.exception.SystemException;
018 import com.liferay.portal.kernel.io.DummyWriter;
019 import com.liferay.portal.kernel.log.Log;
020 import com.liferay.portal.kernel.log.LogFactoryUtil;
021
022 import java.io.File;
023 import java.io.FileInputStream;
024 import java.io.IOException;
025 import java.io.InputStream;
026
027 import org.apache.tika.metadata.Metadata;
028 import org.apache.tika.parser.ParseContext;
029 import org.apache.tika.parser.Parser;
030 import org.apache.tika.sax.WriteOutContentHandler;
031
032 import org.xml.sax.ContentHandler;
033
034
039 public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
040
041 @Override
042 public Metadata extractMetadata(
043 String extension, String mimeType, File file)
044 throws SystemException {
045
046 Metadata metadata = super.extractMetadata(
047 extension, mimeType, file);
048
049 try {
050 InputStream inputStream = new FileInputStream(file);
051
052 return extractMetadata(inputStream, metadata);
053 }
054 catch (IOException ioe) {
055 throw new SystemException(ioe);
056 }
057 }
058
059 @Override
060 public Metadata extractMetadata(
061 String extension, String mimeType, InputStream inputStream)
062 throws SystemException {
063
064 Metadata metadata = super.extractMetadata(
065 extension, mimeType, inputStream);
066
067 try {
068 return extractMetadata(inputStream, metadata);
069 }
070 catch (IOException ioe) {
071 throw new SystemException(ioe);
072 }
073 }
074
075 public void setParser(Parser parser) {
076 _parser = parser;
077 }
078
079 protected Metadata extractMetadata(
080 InputStream inputStream, Metadata metadata)
081 throws IOException {
082
083 if (metadata == null) {
084 metadata = new Metadata();
085 }
086
087 ParseContext parserContext = new ParseContext();
088
089 parserContext.set(Parser.class, _parser);
090
091 ContentHandler contentHandler = new WriteOutContentHandler(
092 new DummyWriter());
093
094 try {
095 _parser.parse(inputStream, contentHandler, metadata, parserContext);
096 }
097 catch (Exception e) {
098 _log.error("Unable to parse", e);
099
100 throw new IOException(e.getMessage());
101 }
102
103
104
105 metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
106 metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
107
108 return metadata;
109 }
110
111 private static Log _log = LogFactoryUtil.getLog(
112 TikaRawMetadataProcessor.class);
113
114 private Parser _parser;
115
116 }