001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.DocumentImpl;
020 import com.liferay.portal.kernel.search.SearchException;
021 import com.liferay.portal.kernel.search.Tokenizer;
022 import com.liferay.portal.kernel.util.ObjectValuePair;
023
024 import java.io.IOException;
025 import java.io.Reader;
026 import java.io.StringReader;
027
028 import java.util.ArrayList;
029 import java.util.LinkedHashMap;
030 import java.util.List;
031 import java.util.Map;
032 import java.util.regex.Matcher;
033 import java.util.regex.Pattern;
034
035 import org.apache.lucene.analysis.Analyzer;
036 import org.apache.lucene.analysis.TokenStream;
037 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
038 import org.apache.lucene.document.Fieldable;
039
040
044 public class PerFieldAnalyzer extends Analyzer implements Tokenizer {
045
046 public PerFieldAnalyzer(
047 Analyzer defaultAnalyzer, Map<String, Analyzer> analyzerMap) {
048
049 _analyzer = defaultAnalyzer;
050
051 for (Map.Entry<String, Analyzer> entry : analyzerMap.entrySet()) {
052 addAnalyzer(entry.getKey(), entry.getValue());
053 }
054 }
055
056 public void addAnalyzer(String fieldName, Analyzer analyzer) {
057 _analyzers.put(
058 fieldName,
059 new ObjectValuePair<Pattern, Analyzer>(
060 Pattern.compile(fieldName), analyzer));
061 }
062
063 public Analyzer getAnalyzer(String fieldName) {
064 ObjectValuePair<Pattern, Analyzer> objectValuePair = _analyzers.get(
065 fieldName);
066
067 if (objectValuePair != null) {
068 return objectValuePair.getValue();
069 }
070
071 for (ObjectValuePair<Pattern, Analyzer> curObjectValuePair :
072 _analyzers.values()) {
073
074 Pattern pattern = curObjectValuePair.getKey();
075
076 Matcher matcher = pattern.matcher(fieldName);
077
078 if (matcher.matches()) {
079 return curObjectValuePair.getValue();
080 }
081 }
082
083 return _analyzer;
084 }
085
086 @Override
087 public int getOffsetGap(Fieldable field) {
088 Analyzer analyzer = getAnalyzer(field.name());
089
090 return analyzer.getOffsetGap(field);
091 }
092
093 @Override
094 public int getPositionIncrementGap(String fieldName) {
095 Analyzer analyzer = getAnalyzer(fieldName);
096
097 return analyzer.getPositionIncrementGap(fieldName);
098 }
099
100 @Override
101 public final TokenStream reusableTokenStream(
102 String fieldName, Reader reader)
103 throws IOException {
104
105 Analyzer analyzer = getAnalyzer(fieldName);
106
107 return analyzer.reusableTokenStream(fieldName, reader);
108 }
109
110 @Override
111 public List<String> tokenize(
112 String fieldName, String input, String languageId)
113 throws SearchException {
114
115 List<String> tokens = new ArrayList<String>();
116 TokenStream tokenStream = null;
117
118 try {
119 String localizedFieldName = DocumentImpl.getLocalizedName(
120 languageId, fieldName);
121
122 Analyzer analyzer = getAnalyzer(localizedFieldName);
123
124 tokenStream = analyzer.tokenStream(
125 localizedFieldName, new StringReader(input));
126
127 CharTermAttribute charTermAttribute = tokenStream.addAttribute(
128 CharTermAttribute.class);
129
130 tokenStream.reset();
131
132 while (tokenStream.incrementToken()) {
133 tokens.add(charTermAttribute.toString());
134 }
135
136 tokenStream.end();
137 }
138 catch (IOException ioe) {
139 throw new SearchException(ioe);
140 }
141 finally {
142 if (tokenStream != null) {
143 try {
144 tokenStream.close();
145 }
146 catch (IOException ioe) {
147 if (_log.isWarnEnabled()) {
148 _log.warn("Unable to close token stream", ioe);
149 }
150 }
151 }
152 }
153
154 return tokens;
155 }
156
157 @Override
158 public final TokenStream tokenStream(String fieldName, Reader reader) {
159 Analyzer analyzer = getAnalyzer(fieldName);
160
161 return analyzer.tokenStream(fieldName, reader);
162 }
163
164 private static final Log _log = LogFactoryUtil.getLog(
165 PerFieldAnalyzer.class);
166
167 private final Analyzer _analyzer;
168 private final Map<String, ObjectValuePair<Pattern, Analyzer>> _analyzers =
169 new LinkedHashMap<String, ObjectValuePair<Pattern, Analyzer>>();
170
171 }