001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018 import com.liferay.portal.kernel.log.Log;
019 import com.liferay.portal.kernel.log.LogFactoryUtil;
020 import com.liferay.portal.kernel.search.Field;
021 import com.liferay.portal.kernel.util.PropsKeys;
022 import com.liferay.portal.kernel.util.StringPool;
023 import com.liferay.portal.kernel.util.StringUtil;
024 import com.liferay.portal.kernel.util.Validator;
025 import com.liferay.portal.util.PropsUtil;
026 import com.liferay.util.lucene.KeywordsUtil;
027
028 import java.io.IOException;
029
030 import java.util.HashSet;
031 import java.util.Map;
032 import java.util.Set;
033 import java.util.concurrent.ConcurrentHashMap;
034
035 import org.apache.lucene.analysis.Analyzer;
036 import org.apache.lucene.analysis.TokenStream;
037 import org.apache.lucene.analysis.WhitespaceAnalyzer;
038 import org.apache.lucene.document.Document;
039 import org.apache.lucene.index.Term;
040 import org.apache.lucene.queryParser.ParseException;
041 import org.apache.lucene.queryParser.QueryParser;
042 import org.apache.lucene.search.BooleanClause;
043 import org.apache.lucene.search.BooleanQuery;
044 import org.apache.lucene.search.IndexSearcher;
045 import org.apache.lucene.search.Query;
046 import org.apache.lucene.search.TermQuery;
047 import org.apache.lucene.search.WildcardQuery;
048 import org.apache.lucene.search.highlight.Highlighter;
049 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
050 import org.apache.lucene.search.highlight.QueryScorer;
051 import org.apache.lucene.search.highlight.QueryTermExtractor;
052 import org.apache.lucene.search.highlight.SimpleFragmenter;
053 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
054 import org.apache.lucene.search.highlight.WeightedTerm;
055
056
061 public class LuceneHelperImpl implements LuceneHelper {
062
063 public void addDocument(long companyId, Document document)
064 throws IOException {
065
066 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
067
068 indexAccessor.addDocument(document);
069 }
070
071 public void addExactTerm(
072 BooleanQuery booleanQuery, String field, String value) {
073
074
075
076 Query query = new TermQuery(new Term(field, value));
077
078 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
079 }
080
081 public void addRequiredTerm(
082 BooleanQuery booleanQuery, String field, String value, boolean like) {
083
084 if (like) {
085 value = StringUtil.replace(
086 value, StringPool.PERCENT, StringPool.STAR);
087
088 value = value.toLowerCase();
089
090 WildcardQuery wildcardQuery = new WildcardQuery(
091 new Term(field, value));
092
093 booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
094 }
095 else {
096
097
098 Term term = new Term(field, value);
099 TermQuery termQuery = new TermQuery(term);
100
101 booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
102 }
103 }
104
105 public void addTerm(
106 BooleanQuery booleanQuery, String field, String value, boolean like)
107 throws ParseException {
108
109 if (Validator.isNull(value)) {
110 return;
111 }
112
113 if (like) {
114 value = StringUtil.replace(
115 value, StringPool.PERCENT, StringPool.BLANK);
116
117 value = value.toLowerCase();
118
119 Term term = new Term(
120 field, StringPool.STAR.concat(value).concat(StringPool.STAR));
121
122 WildcardQuery wildcardQuery = new WildcardQuery(term);
123
124 booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
125 }
126 else {
127 QueryParser queryParser = new QueryParser(field, getAnalyzer());
128
129 try {
130 Query query = queryParser.parse(value);
131
132 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
133 }
134 catch (ParseException pe) {
135 if (_log.isDebugEnabled()) {
136 _log.debug(
137 "ParseException thrown, reverting to literal search",
138 pe);
139 }
140
141 value = KeywordsUtil.escape(value);
142
143 Query query = queryParser.parse(value);
144
145 booleanQuery.add(query, BooleanClause.Occur.SHOULD);
146 }
147 }
148 }
149
150 public void delete(long companyId) {
151 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
152
153 indexAccessor.delete();
154 }
155
156 public void deleteDocuments(long companyId, Term term) throws IOException {
157 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
158
159 indexAccessor.deleteDocuments(term);
160 }
161
162 public Analyzer getAnalyzer() {
163 try {
164 return (Analyzer)_analyzerClass.newInstance();
165 }
166 catch (Exception e) {
167 throw new RuntimeException(e);
168 }
169 }
170
171 public String[] getQueryTerms(Query query) {
172 String[] fieldNames = new String[] {
173 Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
174 Field.USER_NAME
175 };
176
177 WeightedTerm[] weightedTerms = null;
178
179 for (String fieldName : fieldNames) {
180 weightedTerms = QueryTermExtractor.getTerms(
181 query, false, fieldName);
182
183 if (weightedTerms.length > 0) {
184 break;
185 }
186 }
187
188 Set<String> queryTerms = new HashSet<String>();
189
190 for (WeightedTerm weightedTerm : weightedTerms) {
191 queryTerms.add(weightedTerm.getTerm());
192 }
193
194 return queryTerms.toArray(new String[queryTerms.size()]);
195 }
196
197 public IndexSearcher getSearcher(long companyId, boolean readOnly)
198 throws IOException {
199
200 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
201
202 return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
203 }
204
205 public String getSnippet(
206 Query query, String field, String s, int maxNumFragments,
207 int fragmentLength, String fragmentSuffix, String preTag,
208 String postTag)
209 throws IOException {
210
211 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
212 preTag, postTag);
213
214 QueryScorer queryScorer = new QueryScorer(query, field);
215
216 Highlighter highlighter = new Highlighter(
217 simpleHTMLFormatter, queryScorer);
218
219 highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
220
221 TokenStream tokenStream = getAnalyzer().tokenStream(
222 field, new UnsyncStringReader(s));
223
224 try {
225 String snippet = highlighter.getBestFragments(
226 tokenStream, s, maxNumFragments, fragmentSuffix);
227
228 if (Validator.isNotNull(snippet) &&
229 !StringUtil.endsWith(snippet, fragmentSuffix)) {
230
231 snippet = snippet + fragmentSuffix;
232 }
233
234 return snippet;
235 }
236 catch (InvalidTokenOffsetsException itoe) {
237 throw new IOException(itoe.getMessage());
238 }
239 }
240
241 public void updateDocument(long companyId, Term term, Document document)
242 throws IOException {
243
244 IndexAccessor indexAccessor = _getIndexAccessor(companyId);
245
246 indexAccessor.updateDocument(term, document);
247 }
248
249 public void shutdown() {
250 for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
251 indexAccessor.close();
252 }
253 }
254
255 private LuceneHelperImpl() {
256 String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
257
258 if (Validator.isNotNull(analyzerName)) {
259 try {
260 _analyzerClass = Class.forName(analyzerName);
261 }
262 catch (Exception e) {
263 _log.error(e);
264 }
265 }
266 }
267
268 private IndexAccessor _getIndexAccessor(long companyId) {
269 IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
270
271 if (indexAccessor == null) {
272 synchronized (this) {
273 indexAccessor = _indexAccessorMap.get(companyId);
274
275 if (indexAccessor == null) {
276 indexAccessor = new IndexAccessorImpl(companyId);
277
278 _indexAccessorMap.put(companyId, indexAccessor);
279 }
280 }
281 }
282
283 return indexAccessor;
284 }
285
286 private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
287
288 private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
289 private Map<Long, IndexAccessor> _indexAccessorMap =
290 new ConcurrentHashMap<Long, IndexAccessor>();
291
292 }