001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.DocumentImpl;
020    import com.liferay.portal.kernel.search.SearchException;
021    import com.liferay.portal.kernel.search.Tokenizer;
022    import com.liferay.portal.kernel.util.ObjectValuePair;
023    
024    import java.io.IOException;
025    import java.io.Reader;
026    import java.io.StringReader;
027    
028    import java.util.ArrayList;
029    import java.util.LinkedHashMap;
030    import java.util.List;
031    import java.util.Map;
032    import java.util.regex.Matcher;
033    import java.util.regex.Pattern;
034    
035    import org.apache.lucene.analysis.Analyzer;
036    import org.apache.lucene.analysis.TokenStream;
037    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
038    import org.apache.lucene.document.Fieldable;
039    
040    /**
041     * @author Raymond Aug??
042     * @author Mate Thurzo
043     */
044    public class PerFieldAnalyzer extends Analyzer implements Tokenizer {
045    
046            public PerFieldAnalyzer(
047                    Analyzer defaultAnalyzer, Map<String, Analyzer> analyzerMap) {
048    
049                    _analyzer = defaultAnalyzer;
050    
051                    for (Map.Entry<String, Analyzer> entry : analyzerMap.entrySet()) {
052                            addAnalyzer(entry.getKey(), entry.getValue());
053                    }
054            }
055    
056            public void addAnalyzer(String fieldName, Analyzer analyzer) {
057                    _analyzers.put(
058                            fieldName,
059                            new ObjectValuePair<Pattern, Analyzer>(
060                                    Pattern.compile(fieldName), analyzer));
061            }
062    
063            public Analyzer getAnalyzer(String fieldName) {
064                    ObjectValuePair<Pattern, Analyzer> objectValuePair = _analyzers.get(
065                            fieldName);
066    
067                    if (objectValuePair != null) {
068                            return objectValuePair.getValue();
069                    }
070    
071                    for (ObjectValuePair<Pattern, Analyzer> curObjectValuePair :
072                                    _analyzers.values()) {
073    
074                            Pattern pattern = curObjectValuePair.getKey();
075    
076                            Matcher matcher = pattern.matcher(fieldName);
077    
078                            if (matcher.matches()) {
079                                    return curObjectValuePair.getValue();
080                            }
081                    }
082    
083                    return _analyzer;
084            }
085    
086            @Override
087            public int getOffsetGap(Fieldable field) {
088                    Analyzer analyzer = getAnalyzer(field.name());
089    
090                    return analyzer.getOffsetGap(field);
091            }
092    
093            @Override
094            public int getPositionIncrementGap(String fieldName) {
095                    Analyzer analyzer = getAnalyzer(fieldName);
096    
097                    return analyzer.getPositionIncrementGap(fieldName);
098            }
099    
100            @Override
101            public final TokenStream reusableTokenStream(
102                            String fieldName, Reader reader)
103                    throws IOException {
104    
105                    Analyzer analyzer = getAnalyzer(fieldName);
106    
107                    return analyzer.reusableTokenStream(fieldName, reader);
108            }
109    
110            @Override
111            public List<String> tokenize(
112                            String fieldName, String input, String languageId)
113                    throws SearchException {
114    
115                    List<String> tokens = new ArrayList<String>();
116                    TokenStream tokenStream = null;
117    
118                    try {
119                            String localizedFieldName = DocumentImpl.getLocalizedName(
120                                    languageId, fieldName);
121    
122                            Analyzer analyzer = getAnalyzer(localizedFieldName);
123    
124                            tokenStream = analyzer.tokenStream(
125                                    localizedFieldName, new StringReader(input));
126    
127                            CharTermAttribute charTermAttribute = tokenStream.addAttribute(
128                                    CharTermAttribute.class);
129    
130                            tokenStream.reset();
131    
132                            while (tokenStream.incrementToken()) {
133                                    tokens.add(charTermAttribute.toString());
134                            }
135    
136                            tokenStream.end();
137                    }
138                    catch (IOException ioe) {
139                            throw new SearchException(ioe);
140                    }
141                    finally {
142                            if (tokenStream != null) {
143                                    try {
144                                            tokenStream.close();
145                                    }
146                                    catch (IOException ioe) {
147                                            if (_log.isWarnEnabled()) {
148                                                    _log.warn("Unable to close token stream", ioe);
149                                            }
150                                    }
151                            }
152                    }
153    
154                    return tokens;
155            }
156    
157            @Override
158            public final TokenStream tokenStream(String fieldName, Reader reader) {
159                    Analyzer analyzer = getAnalyzer(fieldName);
160    
161                    return analyzer.tokenStream(fieldName, reader);
162            }
163    
164            private static final Log _log = LogFactoryUtil.getLog(
165                    PerFieldAnalyzer.class);
166    
167            private final Analyzer _analyzer;
168            private final Map<String, ObjectValuePair<Pattern, Analyzer>> _analyzers =
169                    new LinkedHashMap<String, ObjectValuePair<Pattern, Analyzer>>();
170    
171    }