001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.metadata;
016    
017    import com.liferay.portal.kernel.exception.SystemException;
018    import com.liferay.portal.kernel.io.DummyWriter;
019    import com.liferay.portal.kernel.log.Log;
020    import com.liferay.portal.kernel.log.LogFactoryUtil;
021    import com.liferay.portal.kernel.util.StreamUtil;
022    
023    import java.io.File;
024    import java.io.FileInputStream;
025    import java.io.IOException;
026    import java.io.InputStream;
027    
028    import org.apache.tika.metadata.Metadata;
029    import org.apache.tika.parser.ParseContext;
030    import org.apache.tika.parser.Parser;
031    import org.apache.tika.sax.WriteOutContentHandler;
032    
033    import org.xml.sax.ContentHandler;
034    
035    /**
036     * @author Miguel Pastor
037     * @author Alexander Chow
038     * @author Shuyang Zhou
039     */
040    public class TikaRawMetadataProcessor extends XugglerRawMetadataProcessor {
041    
042            public void setParser(Parser parser) {
043                    _parser = parser;
044            }
045    
046            protected Metadata extractMetadata(
047                            InputStream inputStream, Metadata metadata)
048                    throws IOException {
049    
050                    if (metadata == null) {
051                            metadata = new Metadata();
052                    }
053    
054                    ParseContext parserContext = new ParseContext();
055    
056                    parserContext.set(Parser.class, _parser);
057    
058                    ContentHandler contentHandler = new WriteOutContentHandler(
059                            new DummyWriter());
060    
061                    try {
062                            _parser.parse(inputStream, contentHandler, metadata, parserContext);
063                    }
064                    catch (Exception e) {
065                            _log.error("Unable to parse", e);
066    
067                            throw new IOException(e.getMessage());
068                    }
069    
070                    // Remove potential security risks
071    
072                    metadata.remove(XMPDM.ABS_PEAK_AUDIO_FILE_PATH.getName());
073                    metadata.remove(XMPDM.RELATIVE_PEAK_AUDIO_FILE_PATH.getName());
074    
075                    return metadata;
076            }
077    
078            @Override
079            protected Metadata extractMetadata(
080                            String extension, String mimeType, File file)
081                    throws SystemException {
082    
083                    Metadata metadata = super.extractMetadata(extension, mimeType, file);
084    
085                    InputStream inputStream = null;
086    
087                    try {
088                            inputStream = new FileInputStream(file);
089    
090                            return extractMetadata(inputStream, metadata);
091                    }
092                    catch (IOException ioe) {
093                            throw new SystemException(ioe);
094                    }
095                    finally {
096                            StreamUtil.cleanUp(inputStream);
097                    }
098            }
099    
100            @Override
101            protected Metadata extractMetadata(
102                            String extension, String mimeType, InputStream inputStream)
103                    throws SystemException {
104    
105                    Metadata metadata = super.extractMetadata(
106                            extension, mimeType, inputStream);
107    
108                    try {
109                            return extractMetadata(inputStream, metadata);
110                    }
111                    catch (IOException ioe) {
112                            throw new SystemException(ioe);
113                    }
114            }
115    
116            private static Log _log = LogFactoryUtil.getLog(
117                    TikaRawMetadataProcessor.class);
118    
119            private Parser _parser;
120    
121    }