001    /**
002     * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.metadata;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.io.DummyWriter;
019    import com.liferay.portal.kernel.log.Log;
020    import com.liferay.portal.kernel.log.LogFactoryUtil;
021    
022    import java.io.File;
023    import java.io.FileInputStream;
024    import java.io.IOException;
025    import java.io.InputStream;
026    
027    import org.apache.tika.metadata.Metadata;
028    import org.apache.tika.parser.ParseContext;
029    import org.apache.tika.parser.Parser;
030    import org.apache.tika.sax.WriteOutContentHandler;
031    
032    import org.xml.sax.ContentHandler;
033    
034    /**
035     * @author Miguel Pastor
036     * @author Alexander Chow
037     * @author Shuyang Zhou
038     */
039    public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
040    
041            @Override
042            public Metadata extractMetadata(
043                            String extension, String mimeType, File file)
044                    throws SystemException {
045    
046                    Metadata metadata = super.extractMetadata(
047                            extension, mimeType, file);
048    
049                    try {
050                            InputStream inputStream = new FileInputStream(file);
051    
052                            return extractMetadata(inputStream, metadata);
053                    }
054                    catch (IOException ioe) {
055                            throw new SystemException(ioe);
056                    }
057            }
058    
059            @Override
060            public Metadata extractMetadata(
061                            String extension, String mimeType, InputStream inputStream)
062                    throws SystemException {
063    
064                    Metadata metadata = super.extractMetadata(
065                            extension, mimeType, inputStream);
066    
067                    try {
068                            return extractMetadata(inputStream, metadata);
069                    }
070                    catch (IOException ioe) {
071                            throw new SystemException(ioe);
072                    }
073            }
074    
075            public void setParser(Parser parser) {
076                    _parser = parser;
077            }
078    
079            protected Metadata extractMetadata(
080                            InputStream inputStream, Metadata metadata)
081                    throws IOException {
082    
083                    if (metadata == null) {
084                            metadata = new Metadata();
085                    }
086    
087                    ParseContext parserContext = new ParseContext();
088    
089                    parserContext.set(Parser.class, _parser);
090    
091                    ContentHandler contentHandler = new WriteOutContentHandler(
092                            new DummyWriter());
093    
094                    try {
095                            _parser.parse(inputStream, contentHandler, metadata, parserContext);
096                    }
097                    catch (Exception e) {
098                            _log.error("Unable to parse", e);
099    
100                            throw new IOException(e.getMessage());
101                    }
102    
103                    // Remove potential security risks
104    
105                    metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
106                    metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
107    
108                    return metadata;
109            }
110    
111            private static Log _log = LogFactoryUtil.getLog(
112                    TikaRawMetadataProcessor.class);
113    
114            private Parser _parser;
115    
116    }