001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.DocumentImpl;
020 import com.liferay.portal.kernel.search.SearchException;
021 import com.liferay.portal.kernel.search.Tokenizer;
022
023 import java.io.IOException;
024 import java.io.Reader;
025 import java.io.StringReader;
026
027 import java.util.ArrayList;
028 import java.util.HashMap;
029 import java.util.List;
030 import java.util.Map;
031 import java.util.regex.Pattern;
032
033 import org.apache.lucene.analysis.Analyzer;
034 import org.apache.lucene.analysis.TokenStream;
035 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
036 import org.apache.lucene.document.Fieldable;
037
038
042 public class PerFieldAnalyzer extends Analyzer implements Tokenizer {
043
044 public PerFieldAnalyzer(
045 Analyzer defaultAnalyzer, Map<String, Analyzer> analyzerMap) {
046
047 _analyzer = defaultAnalyzer;
048 _analyzers = analyzerMap;
049 }
050
051 public void addAnalyzer(String fieldName, Analyzer analyzer) {
052 _analyzers.put(fieldName, analyzer);
053 }
054
055 public Analyzer getAnalyzer(String fieldName) {
056 Analyzer analyzer = _analyzers.get(fieldName);
057
058 if (analyzer != null) {
059 return analyzer;
060 }
061
062 for (String key : _analyzers.keySet()) {
063 if (Pattern.matches(key, fieldName)) {
064 return _analyzers.get(key);
065 }
066 }
067
068 return _analyzer;
069 }
070
071 @Override
072 public int getOffsetGap(Fieldable field) {
073 Analyzer analyzer = getAnalyzer(field.name());
074
075 return analyzer.getOffsetGap(field);
076 }
077
078 @Override
079 public int getPositionIncrementGap(String fieldName) {
080 Analyzer analyzer = getAnalyzer(fieldName);
081
082 return analyzer.getPositionIncrementGap(fieldName);
083 }
084
085 @Override
086 public TokenStream reusableTokenStream(String fieldName, Reader reader)
087 throws IOException {
088
089 Analyzer analyzer = getAnalyzer(fieldName);
090
091 return analyzer.reusableTokenStream(fieldName, reader);
092 }
093
094 @Override
095 public List<String> tokenize(
096 String fieldName, String input, String languageId)
097 throws SearchException {
098
099 List<String> tokens = new ArrayList<String>();
100 TokenStream tokenStream = null;
101
102 try {
103 String localizedFieldName = DocumentImpl.getLocalizedName(
104 languageId, fieldName);
105
106 Analyzer analyzer = getAnalyzer(localizedFieldName);
107
108 tokenStream = analyzer.tokenStream(
109 localizedFieldName, new StringReader(input));
110
111 CharTermAttribute charTermAttribute = tokenStream.addAttribute(
112 CharTermAttribute.class);
113
114 tokenStream.reset();
115
116 while (tokenStream.incrementToken()) {
117 tokens.add(charTermAttribute.toString());
118 }
119
120 tokenStream.end();
121 }
122 catch (IOException ioe) {
123 throw new SearchException(ioe);
124 }
125 finally {
126 if (tokenStream != null) {
127 try {
128 tokenStream.close();
129 }
130 catch (IOException ioe) {
131 if (_log.isWarnEnabled()) {
132 _log.warn("Unable to close token stream", ioe);
133 }
134 }
135 }
136 }
137
138 return tokens;
139 }
140
141 @Override
142 public TokenStream tokenStream(String fieldName, Reader reader) {
143 Analyzer analyzer = getAnalyzer(fieldName);
144
145 return analyzer.tokenStream(fieldName, reader);
146 }
147
148 private static Log _log = LogFactoryUtil.getLog(PerFieldAnalyzer.class);
149
150 private Analyzer _analyzer;
151 private Map<String, Analyzer> _analyzers = new HashMap<String, Analyzer>();
152
153 }