001
014
015 package com.liferay.portal.metadata;
016
017 import com.liferay.portal.fabric.InputResource;
018 import com.liferay.portal.kernel.exception.SystemException;
019 import com.liferay.portal.kernel.io.DummyWriter;
020 import com.liferay.portal.kernel.log.Log;
021 import com.liferay.portal.kernel.log.LogFactoryUtil;
022 import com.liferay.portal.kernel.process.ClassPathUtil;
023 import com.liferay.portal.kernel.process.ProcessCallable;
024 import com.liferay.portal.kernel.process.ProcessChannel;
025 import com.liferay.portal.kernel.process.ProcessException;
026 import com.liferay.portal.kernel.process.ProcessExecutorUtil;
027 import com.liferay.portal.kernel.util.ArrayUtil;
028 import com.liferay.portal.kernel.util.FileUtil;
029 import com.liferay.portal.util.PropsValues;
030
031 import java.io.File;
032 import java.io.FileInputStream;
033 import java.io.IOException;
034 import java.io.InputStream;
035
036 import java.util.concurrent.Future;
037
038 import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
039 import org.apache.commons.lang.exception.ExceptionUtils;
040 import org.apache.pdfbox.exceptions.CryptographyException;
041 import org.apache.poi.EncryptedDocumentException;
042 import org.apache.tika.exception.TikaException;
043 import org.apache.tika.metadata.Metadata;
044 import org.apache.tika.metadata.XMPDM;
045 import org.apache.tika.parser.ParseContext;
046 import org.apache.tika.parser.Parser;
047 import org.apache.tika.sax.WriteOutContentHandler;
048
049 import org.xml.sax.ContentHandler;
050
051
056 public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
057
058 public void setParser(Parser parser) {
059 _parser = parser;
060 }
061
062 protected static Metadata extractMetadata(
063 File file, Metadata metadata, Parser parser)
064 throws IOException {
065
066 if (metadata == null) {
067 metadata = new Metadata();
068 }
069
070 ParseContext parserContext = new ParseContext();
071
072 parserContext.set(Parser.class, parser);
073
074 ContentHandler contentHandler = new WriteOutContentHandler(
075 new DummyWriter());
076
077 try (InputStream inputStream = new FileInputStream(file)) {
078 parser.parse(inputStream, contentHandler, metadata, parserContext);
079 }
080 catch (Exception e) {
081 Throwable throwable = ExceptionUtils.getRootCause(e);
082
083 if ((throwable instanceof CryptographyException) ||
084 (throwable instanceof EncryptedDocumentException) ||
085 (throwable instanceof UnsupportedZipFeatureException)) {
086
087 if (_log.isWarnEnabled()) {
088 _log.warn(
089 "Unable to extract metadata from an encrypted file");
090 }
091 }
092 else if (e instanceof TikaException) {
093 if (_log.isWarnEnabled()) {
094 _log.warn("Unable to extract metadata");
095 }
096 }
097 else {
098 _log.error(e, e);
099 }
100
101 throw new IOException(e);
102 }
103
104
105
106 metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
107 metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
108
109 return metadata;
110 }
111
112 @Override
113 protected Metadata extractMetadata(
114 String extension, String mimeType, File file) {
115
116 Metadata metadata = super.extractMetadata(extension, mimeType, file);
117
118 boolean forkProcess = false;
119
120 if (PropsValues.TEXT_EXTRACTION_FORK_PROCESS_ENABLED) {
121 if (ArrayUtil.contains(
122 PropsValues.TEXT_EXTRACTION_FORK_PROCESS_MIME_TYPES,
123 mimeType)) {
124
125 forkProcess = true;
126 }
127 }
128
129 if (forkProcess) {
130 ExtractMetadataProcessCallable extractMetadataProcessCallable =
131 new ExtractMetadataProcessCallable(file, metadata, _parser);
132
133 try {
134 ProcessChannel<Metadata> processChannel =
135 ProcessExecutorUtil.execute(
136 ClassPathUtil.getPortalProcessConfig(),
137 extractMetadataProcessCallable);
138
139 Future<Metadata> future =
140 processChannel.getProcessNoticeableFuture();
141
142 return future.get();
143 }
144 catch (Exception e) {
145 throw new SystemException(e);
146 }
147 }
148
149 try {
150 return extractMetadata(file, metadata, _parser);
151 }
152 catch (IOException ioe) {
153 throw new SystemException(ioe);
154 }
155 }
156
157 @Override
158 protected Metadata extractMetadata(
159 String extension, String mimeType, InputStream inputStream) {
160
161 File file = FileUtil.createTempFile();
162
163 try {
164 FileUtil.write(file, inputStream);
165
166 return extractMetadata(extension, mimeType, file);
167 }
168 catch (Exception e) {
169 throw new SystemException(e);
170 }
171 finally {
172 file.delete();
173 }
174 }
175
176 private static final Log _log = LogFactoryUtil.getLog(
177 TikaRawMetadataProcessor.class);
178
179 private Parser _parser;
180
181 private static class ExtractMetadataProcessCallable
182 implements ProcessCallable<Metadata> {
183
184 public ExtractMetadataProcessCallable(
185 File file, Metadata metadata, Parser parser) {
186
187 _file = file;
188 _metadata = metadata;
189 _parser = parser;
190 }
191
192 @Override
193 public Metadata call() throws ProcessException {
194 try {
195 return extractMetadata(_file, _metadata, _parser);
196 }
197 catch (IOException ioe) {
198 throw new ProcessException(ioe);
199 }
200 }
201
202 private static final long serialVersionUID = 1L;
203
204 @InputResource
205 private final File _file;
206
207 private final Metadata _metadata;
208 private final Parser _parser;
209
210 }
211
212 }