001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.search.CollatorUtil;
018 import com.liferay.portal.kernel.search.DocumentImpl;
019 import com.liferay.portal.kernel.search.Field;
020 import com.liferay.portal.kernel.search.NGramHolder;
021 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022 import com.liferay.portal.kernel.search.QuerySuggester;
023 import com.liferay.portal.kernel.search.SearchContext;
024 import com.liferay.portal.kernel.search.SearchException;
025 import com.liferay.portal.kernel.search.TokenizerUtil;
026 import com.liferay.portal.util.PortletKeys;
027 import com.liferay.util.lucene.KeywordsUtil;
028
029 import java.io.IOException;
030
031 import java.util.ArrayList;
032 import java.util.Arrays;
033 import java.util.Collections;
034 import java.util.Comparator;
035 import java.util.LinkedHashMap;
036 import java.util.List;
037 import java.util.Map;
038
039 import org.apache.lucene.document.Document;
040 import org.apache.lucene.document.Fieldable;
041 import org.apache.lucene.index.IndexReader;
042 import org.apache.lucene.index.Term;
043 import org.apache.lucene.queryParser.ParseException;
044 import org.apache.lucene.queryParser.QueryParser;
045 import org.apache.lucene.search.BooleanClause;
046 import org.apache.lucene.search.BooleanQuery;
047 import org.apache.lucene.search.IndexSearcher;
048 import org.apache.lucene.search.Query;
049 import org.apache.lucene.search.ScoreDoc;
050 import org.apache.lucene.search.TermQuery;
051 import org.apache.lucene.search.TopDocs;
052 import org.apache.lucene.search.spell.StringDistance;
053 import org.apache.lucene.search.spell.SuggestWord;
054 import org.apache.lucene.search.spell.SuggestWordQueue;
055 import org.apache.lucene.util.ReaderUtil;
056
057
060 public class LuceneQuerySuggester implements QuerySuggester {
061
062 public void setBoostEnd(float boostEnd) {
063 _boostEnd = boostEnd;
064 }
065
066 public void setBoostStart(float boostStart) {
067 _boostStart = boostStart;
068 }
069
070 public void setStringDistance(StringDistance stringDistance) {
071 _stringDistance = stringDistance;
072 }
073
074 public void setSuggestWordComparator(
075 Comparator<SuggestWord> suggestWordComparator) {
076
077 _suggestWordComparator = suggestWordComparator;
078 }
079
080 @Override
081 public String spellCheckKeywords(SearchContext searchContext)
082 throws SearchException {
083
084 String languageId = searchContext.getLanguageId();
085
086 String localizedFieldName = DocumentImpl.getLocalizedName(
087 languageId, Field.SPELL_CHECK_WORD);
088
089 List<String> keywords = TokenizerUtil.tokenize(
090 localizedFieldName, searchContext.getKeywords(), languageId);
091
092 Map<String, List<String>> suggestions = spellCheckKeywords(
093 keywords, localizedFieldName, searchContext, languageId, 1);
094
095 return CollatorUtil.collate(suggestions, keywords);
096 }
097
098 @Override
099 public Map<String, List<String>> spellCheckKeywords(
100 SearchContext searchContext, int max)
101 throws SearchException {
102
103 String languageId = searchContext.getLanguageId();
104
105 String localizedFieldName = DocumentImpl.getLocalizedName(
106 languageId, Field.SPELL_CHECK_WORD);
107
108 List<String> keywords = TokenizerUtil.tokenize(
109 localizedFieldName, searchContext.getKeywords(), languageId);
110
111 return spellCheckKeywords(
112 keywords, localizedFieldName, searchContext, languageId, max);
113 }
114
115 @Override
116 public String[] suggestKeywordQueries(SearchContext searchContext, int max)
117 throws SearchException {
118
119 IndexSearcher indexSearcher = null;
120
121 try {
122 indexSearcher = LuceneHelperUtil.getSearcher(
123 searchContext.getCompanyId(), true);
124
125 BooleanQuery suggestKeywordQuery = new BooleanQuery();
126
127 addTermQuery(
128 suggestKeywordQuery, Field.COMPANY_ID,
129 String.valueOf(searchContext.getCompanyId()), null,
130 BooleanClause.Occur.MUST);
131
132 String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
133 searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
134
135 QueryParser queryParser = new QueryParser(
136 LuceneHelperUtil.getVersion(), localizedKeywordFieldName,
137 LuceneHelperUtil.getAnalyzer());
138
139 Query query = null;
140
141 try {
142 query = queryParser.parse(searchContext.getKeywords());
143 }
144 catch (ParseException e) {
145 query = queryParser.parse(
146 KeywordsUtil.escape(searchContext.getKeywords()));
147 }
148
149 BooleanClause keywordTermQuery = new BooleanClause(
150 query, BooleanClause.Occur.MUST);
151
152 suggestKeywordQuery.add(keywordTermQuery);
153
154 String languageId = searchContext.getLanguageId();
155
156 addTermQuery(
157 suggestKeywordQuery, Field.LANGUAGE_ID, languageId, null,
158 BooleanClause.Occur.MUST);
159 addTermQuery(
160 suggestKeywordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
161 BooleanClause.Occur.MUST);
162
163 return search(
164 indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
165 _relevancyChecker, max);
166 }
167 catch (Exception e) {
168 throw new SearchException("Unable to suggest query", e);
169 }
170 finally {
171 LuceneHelperUtil.cleanUp(indexSearcher);
172 }
173 }
174
175 protected void addNGramTermQuery(
176 BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
177 BooleanClause.Occur occur) {
178
179 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
180 String name = nGramEntry.getKey();
181 String value = nGramEntry.getValue();
182
183 addTermQuery(booleanQuery, name, value, boost, occur);
184 }
185 }
186
187 protected void addTermQuery(
188 BooleanQuery booleanQuery, String termName, String termValue,
189 Float boost, BooleanClause.Occur occur) {
190
191 Query query = new TermQuery(new Term(termName, termValue));
192
193 if (boost != null) {
194 query.setBoost(boost);
195 }
196
197 BooleanClause booleanClause = new BooleanClause(query, occur);
198
199 booleanQuery.add(booleanClause);
200 }
201
202 protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
203 BooleanQuery booleanQuery = new BooleanQuery();
204
205 addTermQuery(
206 booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
207 BooleanClause.Occur.SHOULD);
208
209 if ((groupIds != null) && (groupIds.length > 0)) {
210 for (long groupId : groupIds) {
211 addTermQuery(
212 booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
213 BooleanClause.Occur.SHOULD);
214 }
215 }
216
217 return booleanQuery;
218 }
219
220 protected BooleanQuery buildNGramQuery(String word) throws SearchException {
221 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(word);
222
223 BooleanQuery booleanQuery = new BooleanQuery();
224
225 if (_boostEnd > 0) {
226 Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
227
228 addNGramTermQuery(
229 booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
230 }
231
232 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
233
234 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
235 String fieldName = entry.getKey();
236
237 for (String nGram : entry.getValue()) {
238 addTermQuery(
239 booleanQuery, fieldName, nGram, null,
240 BooleanClause.Occur.SHOULD);
241 }
242 }
243
244 if (_boostStart > 0) {
245 Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
246
247 addNGramTermQuery(
248 booleanQuery, nGramStarts, _boostStart,
249 BooleanClause.Occur.SHOULD);
250 }
251
252 return booleanQuery;
253 }
254
255 protected BooleanQuery buildSpellCheckQuery(
256 long groupIds[], String word, String languageId)
257 throws SearchException {
258
259 BooleanQuery suggestWordQuery = new BooleanQuery();
260
261 BooleanQuery nGramQuery = buildNGramQuery(word);
262
263 BooleanClause booleanNGramQueryClause = new BooleanClause(
264 nGramQuery, BooleanClause.Occur.MUST);
265
266 suggestWordQuery.add(booleanNGramQueryClause);
267
268 BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
269
270 BooleanClause groupIdQueryClause = new BooleanClause(
271 groupIdQuery, BooleanClause.Occur.MUST);
272
273 suggestWordQuery.add(groupIdQueryClause);
274
275 addTermQuery(
276 suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
277 BooleanClause.Occur.MUST);
278 addTermQuery(
279 suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
280 BooleanClause.Occur.MUST);
281
282 return suggestWordQuery;
283 }
284
285 protected String[] search(
286 IndexSearcher indexSearcher, Query query, String fieldName,
287 RelevancyChecker relevancyChecker, int max)
288 throws IOException {
289
290 int maxScoreDocs = max * 10;
291
292 TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
293
294 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
295
296 SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
297 max, _suggestWordComparator);
298
299 int stop = Math.min(scoreDocs.length, maxScoreDocs);
300
301 for (int i = 0; i < stop; i++) {
302 SuggestWord suggestWord = new SuggestWord();
303
304 Document document = indexSearcher.doc(scoreDocs[i].doc);
305
306 Fieldable fieldable = document.getFieldable(fieldName);
307
308 suggestWord.string = fieldable.stringValue();
309
310 boolean relevant = relevancyChecker.isRelevant(suggestWord);
311
312 if (relevant) {
313 suggestWordQueue.insertWithOverflow(suggestWord);
314 }
315 }
316
317 String[] words = new String[suggestWordQueue.size()];
318
319 for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
320 SuggestWord suggestWord = suggestWordQueue.pop();
321
322 words[i] = suggestWord.string;
323 }
324
325 return words;
326 }
327
328 protected Map<String, List<String>> spellCheckKeywords(
329 List<String> keywords, String localizedFieldName,
330 SearchContext searchContext, String languageId, int max)
331 throws SearchException {
332
333 IndexSearcher indexSearcher = null;
334
335 try {
336 Map<String, List<String>> suggestions =
337 new LinkedHashMap<String, List<String>>();
338
339 float scoresThreshold = searchContext.getScoresThreshold();
340
341 if (scoresThreshold == 0) {
342 scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
343 }
344
345 indexSearcher = LuceneHelperUtil.getSearcher(
346 searchContext.getCompanyId(), true);
347
348 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
349
350 if (indexSearcher.maxDoc() > 0) {
351 ReaderUtil.gatherSubReaders(
352 indexReaders, indexSearcher.getIndexReader());
353 }
354
355 for (String keyword : keywords) {
356 List<String> suggestionsList = Collections.emptyList();
357
358 if (!SpellCheckerUtil.isValidWord(
359 localizedFieldName, keyword, indexReaders)) {
360
361 int frequency = indexSearcher.docFreq(
362 new Term(localizedFieldName, keyword));
363
364 String[] suggestionsArray = null;
365
366 if (frequency > 0) {
367 suggestionsArray = new String[] {keyword};
368 }
369 else {
370 BooleanQuery suggestWordQuery = buildSpellCheckQuery(
371 searchContext.getGroupIds(), keyword, languageId);
372
373 RelevancyChecker relevancyChecker =
374 new StringDistanceRelevancyChecker(
375 keyword, scoresThreshold, _stringDistance);
376
377 suggestionsArray = search(
378 indexSearcher, suggestWordQuery, localizedFieldName,
379 relevancyChecker, max);
380 }
381
382 suggestionsList = Arrays.asList(suggestionsArray);
383 }
384
385 suggestions.put(keyword, suggestionsList);
386 }
387
388 return suggestions;
389 }
390 catch (IOException ioe) {
391 throw new SearchException("Unable to find suggestions", ioe);
392 }
393 finally {
394 LuceneHelperUtil.cleanUp(indexSearcher);
395 }
396 }
397
398 private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
399
400 private float _boostEnd = 1.0f;
401 private float _boostStart = 2.0f;
402 private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
403 private StringDistance _stringDistance;
404 private Comparator<SuggestWord> _suggestWordComparator =
405 SuggestWordQueue.DEFAULT_COMPARATOR;
406
407 }