001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.DocumentImpl;
020    import com.liferay.portal.kernel.search.SearchException;
021    import com.liferay.portal.kernel.search.Tokenizer;
022    
023    import java.io.IOException;
024    import java.io.Reader;
025    import java.io.StringReader;
026    
027    import java.util.ArrayList;
028    import java.util.HashMap;
029    import java.util.List;
030    import java.util.Map;
031    import java.util.regex.Pattern;
032    
033    import org.apache.lucene.analysis.Analyzer;
034    import org.apache.lucene.analysis.TokenStream;
035    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
036    import org.apache.lucene.document.Fieldable;
037    
038    /**
039     * @author Raymond Aug??
040     * @author Mate Thurzo
041     */
042    public class PerFieldAnalyzer extends Analyzer implements Tokenizer {
043    
044            public PerFieldAnalyzer(
045                    Analyzer defaultAnalyzer, Map<String, Analyzer> analyzerMap) {
046    
047                    _analyzer = defaultAnalyzer;
048                    _analyzers = analyzerMap;
049            }
050    
051            public void addAnalyzer(String fieldName, Analyzer analyzer) {
052                    _analyzers.put(fieldName, analyzer);
053            }
054    
055            public Analyzer getAnalyzer(String fieldName) {
056                    Analyzer analyzer = _analyzers.get(fieldName);
057    
058                    if (analyzer != null) {
059                            return analyzer;
060                    }
061    
062                    for (String key : _analyzers.keySet()) {
063                            if (Pattern.matches(key, fieldName)) {
064                                    return _analyzers.get(key);
065                            }
066                    }
067    
068                    return _analyzer;
069            }
070    
071            @Override
072            public int getOffsetGap(Fieldable field) {
073                    Analyzer analyzer = getAnalyzer(field.name());
074    
075                    return analyzer.getOffsetGap(field);
076            }
077    
078            @Override
079            public int getPositionIncrementGap(String fieldName) {
080                    Analyzer analyzer = getAnalyzer(fieldName);
081    
082                    return analyzer.getPositionIncrementGap(fieldName);
083            }
084    
085            @Override
086            public TokenStream reusableTokenStream(String fieldName, Reader reader)
087                    throws IOException {
088    
089                    Analyzer analyzer = getAnalyzer(fieldName);
090    
091                    return analyzer.reusableTokenStream(fieldName, reader);
092            }
093    
094            @Override
095            public List<String> tokenize(
096                            String fieldName, String input, String languageId)
097                    throws SearchException {
098    
099                    List<String> tokens = new ArrayList<String>();
100                    TokenStream tokenStream = null;
101    
102                    try {
103                            String localizedFieldName = DocumentImpl.getLocalizedName(
104                                    languageId, fieldName);
105    
106                            Analyzer analyzer = getAnalyzer(localizedFieldName);
107    
108                            tokenStream = analyzer.tokenStream(
109                                    localizedFieldName, new StringReader(input));
110    
111                            CharTermAttribute charTermAttribute = tokenStream.addAttribute(
112                                    CharTermAttribute.class);
113    
114                            tokenStream.reset();
115    
116                            while (tokenStream.incrementToken()) {
117                                    tokens.add(charTermAttribute.toString());
118                            }
119    
120                            tokenStream.end();
121                    }
122                    catch (IOException ioe) {
123                            throw new SearchException(ioe);
124                    }
125                    finally {
126                            if (tokenStream != null) {
127                                    try {
128                                            tokenStream.close();
129                                    }
130                                    catch (IOException ioe) {
131                                            if (_log.isWarnEnabled()) {
132                                                    _log.warn("Unable to close token stream", ioe);
133                                            }
134                                    }
135                            }
136                    }
137    
138                    return tokens;
139            }
140    
141            @Override
142            public TokenStream tokenStream(String fieldName, Reader reader) {
143                    Analyzer analyzer = getAnalyzer(fieldName);
144    
145                    return analyzer.tokenStream(fieldName, reader);
146            }
147    
148            private static Log _log = LogFactoryUtil.getLog(PerFieldAnalyzer.class);
149    
150            private Analyzer _analyzer;
151            private Map<String, Analyzer> _analyzers = new HashMap<String, Analyzer>();
152    
153    }