001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.search.BaseQuerySuggester;
018 import com.liferay.portal.kernel.search.DocumentImpl;
019 import com.liferay.portal.kernel.search.Field;
020 import com.liferay.portal.kernel.search.NGramHolder;
021 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022 import com.liferay.portal.kernel.search.SearchContext;
023 import com.liferay.portal.kernel.search.SearchException;
024 import com.liferay.portal.kernel.search.SuggestionConstants;
025 import com.liferay.portal.kernel.search.TokenizerUtil;
026 import com.liferay.portal.kernel.util.ArrayUtil;
027 import com.liferay.portal.util.PortletKeys;
028
029 import java.io.IOException;
030
031 import java.util.ArrayList;
032 import java.util.Arrays;
033 import java.util.Collections;
034 import java.util.Comparator;
035 import java.util.LinkedHashMap;
036 import java.util.List;
037 import java.util.Map;
038
039 import org.apache.lucene.document.Document;
040 import org.apache.lucene.document.Fieldable;
041 import org.apache.lucene.index.IndexReader;
042 import org.apache.lucene.index.Term;
043 import org.apache.lucene.search.BooleanClause;
044 import org.apache.lucene.search.BooleanQuery;
045 import org.apache.lucene.search.IndexSearcher;
046 import org.apache.lucene.search.Query;
047 import org.apache.lucene.search.ScoreDoc;
048 import org.apache.lucene.search.TermQuery;
049 import org.apache.lucene.search.TopDocs;
050 import org.apache.lucene.search.spell.StringDistance;
051 import org.apache.lucene.search.spell.SuggestWord;
052 import org.apache.lucene.search.spell.SuggestWordQueue;
053 import org.apache.lucene.util.ReaderUtil;
054
055
058 public class LuceneQuerySuggester extends BaseQuerySuggester {
059
060 public void setBoostEnd(float boostEnd) {
061 _boostEnd = boostEnd;
062 }
063
064 public void setBoostStart(float boostStart) {
065 _boostStart = boostStart;
066 }
067
068 public void setQuerySuggestionMaxNGramLength(
069 int querySuggestionMaxNGramLength) {
070
071 _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
072 }
073
074 public void setStringDistance(StringDistance stringDistance) {
075 _stringDistance = stringDistance;
076 }
077
078 public void setSuggestWordComparator(
079 Comparator<SuggestWord> suggestWordComparator) {
080
081 _suggestWordComparator = suggestWordComparator;
082 }
083
084 @Override
085 public Map<String, List<String>> spellCheckKeywords(
086 SearchContext searchContext, int max)
087 throws SearchException {
088
089 String languageId = searchContext.getLanguageId();
090
091 String localizedFieldName = DocumentImpl.getLocalizedName(
092 languageId, Field.SPELL_CHECK_WORD);
093
094 List<String> keywords = TokenizerUtil.tokenize(
095 localizedFieldName, searchContext.getKeywords(), languageId);
096
097 return spellCheckKeywords(
098 keywords, localizedFieldName, searchContext, languageId, max);
099 }
100
101 @Override
102 public String[] suggestKeywordQueries(SearchContext searchContext, int max)
103 throws SearchException {
104
105 IndexSearcher indexSearcher = null;
106
107 try {
108 indexSearcher = LuceneHelperUtil.getSearcher(
109 searchContext.getCompanyId(), true);
110
111 String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
112 searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
113
114 BooleanQuery suggestKeywordQuery = buildSpellCheckQuery(
115 searchContext.getGroupIds(), searchContext.getKeywords(),
116 searchContext.getLanguageId(),
117 SuggestionConstants.TYPE_QUERY_SUGGESTION,
118 _querySuggestionMaxNGramLength);
119
120 return search(
121 indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
122 _relevancyChecker, max);
123 }
124 catch (Exception e) {
125 throw new SearchException("Unable to suggest query", e);
126 }
127 finally {
128 LuceneHelperUtil.cleanUp(indexSearcher);
129 }
130 }
131
132 protected void addNGramTermQuery(
133 BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
134 BooleanClause.Occur occur) {
135
136 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
137 String name = nGramEntry.getKey();
138 String value = nGramEntry.getValue();
139
140 addTermQuery(booleanQuery, name, value, boost, occur);
141 }
142 }
143
144 protected void addTermQuery(
145 BooleanQuery booleanQuery, String termName, String termValue,
146 Float boost, BooleanClause.Occur occur) {
147
148 Query query = new TermQuery(new Term(termName, termValue));
149
150 if (boost != null) {
151 query.setBoost(boost);
152 }
153
154 BooleanClause booleanClause = new BooleanClause(query, occur);
155
156 booleanQuery.add(booleanClause);
157 }
158
159 protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
160 BooleanQuery booleanQuery = new BooleanQuery();
161
162 addTermQuery(
163 booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
164 BooleanClause.Occur.SHOULD);
165
166 if (ArrayUtil.isNotEmpty(groupIds)) {
167 for (long groupId : groupIds) {
168 addTermQuery(
169 booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
170 BooleanClause.Occur.SHOULD);
171 }
172 }
173
174 return booleanQuery;
175 }
176
177 protected BooleanQuery buildNGramQuery(String word, int maxNGramLength)
178 throws SearchException {
179
180 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
181 word, maxNGramLength);
182
183 BooleanQuery booleanQuery = new BooleanQuery();
184
185 if (_boostEnd > 0) {
186 Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
187
188 addNGramTermQuery(
189 booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
190 }
191
192 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
193
194 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
195 String fieldName = entry.getKey();
196
197 for (String nGram : entry.getValue()) {
198 addTermQuery(
199 booleanQuery, fieldName, nGram, null,
200 BooleanClause.Occur.SHOULD);
201 }
202 }
203
204 if (_boostStart > 0) {
205 Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
206
207 addNGramTermQuery(
208 booleanQuery, nGramStarts, _boostStart,
209 BooleanClause.Occur.SHOULD);
210 }
211
212 return booleanQuery;
213 }
214
215 protected BooleanQuery buildSpellCheckQuery(
216 long groupIds[], String word, String languageId,
217 String typeFieldValue, int maxNGramLength)
218 throws SearchException {
219
220 BooleanQuery suggestWordQuery = new BooleanQuery();
221
222 BooleanQuery nGramQuery = buildNGramQuery(word, maxNGramLength);
223
224 BooleanClause booleanNGramQueryClause = new BooleanClause(
225 nGramQuery, BooleanClause.Occur.MUST);
226
227 suggestWordQuery.add(booleanNGramQueryClause);
228
229 BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
230
231 BooleanClause groupIdQueryClause = new BooleanClause(
232 groupIdQuery, BooleanClause.Occur.MUST);
233
234 suggestWordQuery.add(groupIdQueryClause);
235
236 addTermQuery(
237 suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
238 BooleanClause.Occur.MUST);
239 addTermQuery(
240 suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
241 BooleanClause.Occur.MUST);
242 addTermQuery(
243 suggestWordQuery, Field.TYPE, typeFieldValue, null,
244 BooleanClause.Occur.MUST);
245
246 return suggestWordQuery;
247 }
248
249 protected String[] search(
250 IndexSearcher indexSearcher, Query query, String fieldName,
251 RelevancyChecker relevancyChecker, int max)
252 throws IOException {
253
254 int maxScoreDocs = max * 10;
255
256 TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
257
258 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
259
260 SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
261 max, _suggestWordComparator);
262
263 int stop = Math.min(scoreDocs.length, maxScoreDocs);
264
265 for (int i = 0; i < stop; i++) {
266 SuggestWord suggestWord = new SuggestWord();
267
268 Document document = indexSearcher.doc(scoreDocs[i].doc);
269
270 Fieldable fieldable = document.getFieldable(fieldName);
271
272 suggestWord.string = fieldable.stringValue();
273
274 boolean relevant = relevancyChecker.isRelevant(suggestWord);
275
276 if (relevant) {
277 suggestWordQueue.insertWithOverflow(suggestWord);
278 }
279 }
280
281 String[] words = new String[suggestWordQueue.size()];
282
283 for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
284 SuggestWord suggestWord = suggestWordQueue.pop();
285
286 words[i] = suggestWord.string;
287 }
288
289 return words;
290 }
291
292 protected Map<String, List<String>> spellCheckKeywords(
293 List<String> keywords, String localizedFieldName,
294 SearchContext searchContext, String languageId, int max)
295 throws SearchException {
296
297 IndexSearcher indexSearcher = null;
298
299 try {
300 Map<String, List<String>> suggestions =
301 new LinkedHashMap<String, List<String>>();
302
303 float scoresThreshold = searchContext.getScoresThreshold();
304
305 if (scoresThreshold == 0) {
306 scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
307 }
308
309 indexSearcher = LuceneHelperUtil.getSearcher(
310 searchContext.getCompanyId(), true);
311
312 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
313
314 if (indexSearcher.maxDoc() > 0) {
315 ReaderUtil.gatherSubReaders(
316 indexReaders, indexSearcher.getIndexReader());
317 }
318
319 for (String keyword : keywords) {
320 List<String> suggestionsList = Collections.emptyList();
321
322 if (!SpellCheckerUtil.isValidWord(
323 localizedFieldName, keyword, indexReaders)) {
324
325 int frequency = indexSearcher.docFreq(
326 new Term(localizedFieldName, keyword));
327
328 String[] suggestionsArray = null;
329
330 if (frequency > 0) {
331 suggestionsArray = new String[] {keyword};
332 }
333 else {
334 BooleanQuery suggestWordQuery = buildSpellCheckQuery(
335 searchContext.getGroupIds(), keyword, languageId,
336 SuggestionConstants.TYPE_SPELL_CHECKER, 0);
337
338 RelevancyChecker relevancyChecker =
339 new StringDistanceRelevancyChecker(
340 keyword, scoresThreshold, _stringDistance);
341
342 suggestionsArray = search(
343 indexSearcher, suggestWordQuery, localizedFieldName,
344 relevancyChecker, max);
345 }
346
347 suggestionsList = Arrays.asList(suggestionsArray);
348 }
349
350 suggestions.put(keyword, suggestionsList);
351 }
352
353 return suggestions;
354 }
355 catch (IOException ioe) {
356 throw new SearchException("Unable to find suggestions", ioe);
357 }
358 finally {
359 LuceneHelperUtil.cleanUp(indexSearcher);
360 }
361 }
362
363 private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
364
365 private float _boostEnd = 1.0f;
366 private float _boostStart = 2.0f;
367 private int _querySuggestionMaxNGramLength = 50;
368 private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
369 private StringDistance _stringDistance;
370 private Comparator<SuggestWord> _suggestWordComparator =
371 SuggestWordQueue.DEFAULT_COMPARATOR;
372
373 }