001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.search.BaseQuerySuggester;
018 import com.liferay.portal.kernel.search.DocumentImpl;
019 import com.liferay.portal.kernel.search.Field;
020 import com.liferay.portal.kernel.search.NGramHolder;
021 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022 import com.liferay.portal.kernel.search.SearchContext;
023 import com.liferay.portal.kernel.search.SearchException;
024 import com.liferay.portal.kernel.search.TokenizerUtil;
025 import com.liferay.portal.kernel.util.ArrayUtil;
026 import com.liferay.portal.util.PortletKeys;
027 import com.liferay.util.lucene.KeywordsUtil;
028
029 import java.io.IOException;
030
031 import java.util.ArrayList;
032 import java.util.Arrays;
033 import java.util.Collections;
034 import java.util.Comparator;
035 import java.util.LinkedHashMap;
036 import java.util.List;
037 import java.util.Map;
038
039 import org.apache.lucene.document.Document;
040 import org.apache.lucene.document.Fieldable;
041 import org.apache.lucene.index.IndexReader;
042 import org.apache.lucene.index.Term;
043 import org.apache.lucene.queryParser.ParseException;
044 import org.apache.lucene.queryParser.QueryParser;
045 import org.apache.lucene.search.BooleanClause;
046 import org.apache.lucene.search.BooleanQuery;
047 import org.apache.lucene.search.IndexSearcher;
048 import org.apache.lucene.search.Query;
049 import org.apache.lucene.search.ScoreDoc;
050 import org.apache.lucene.search.TermQuery;
051 import org.apache.lucene.search.TopDocs;
052 import org.apache.lucene.search.spell.StringDistance;
053 import org.apache.lucene.search.spell.SuggestWord;
054 import org.apache.lucene.search.spell.SuggestWordQueue;
055 import org.apache.lucene.util.ReaderUtil;
056
057
060 public class LuceneQuerySuggester extends BaseQuerySuggester {
061
062 public void setBoostEnd(float boostEnd) {
063 _boostEnd = boostEnd;
064 }
065
066 public void setBoostStart(float boostStart) {
067 _boostStart = boostStart;
068 }
069
070 public void setStringDistance(StringDistance stringDistance) {
071 _stringDistance = stringDistance;
072 }
073
074 public void setSuggestWordComparator(
075 Comparator<SuggestWord> suggestWordComparator) {
076
077 _suggestWordComparator = suggestWordComparator;
078 }
079
080 @Override
081 public Map<String, List<String>> spellCheckKeywords(
082 SearchContext searchContext, int max)
083 throws SearchException {
084
085 String languageId = searchContext.getLanguageId();
086
087 String localizedFieldName = DocumentImpl.getLocalizedName(
088 languageId, Field.SPELL_CHECK_WORD);
089
090 List<String> keywords = TokenizerUtil.tokenize(
091 localizedFieldName, searchContext.getKeywords(), languageId);
092
093 return spellCheckKeywords(
094 keywords, localizedFieldName, searchContext, languageId, max);
095 }
096
097 @Override
098 public String[] suggestKeywordQueries(SearchContext searchContext, int max)
099 throws SearchException {
100
101 IndexSearcher indexSearcher = null;
102
103 try {
104 indexSearcher = LuceneHelperUtil.getSearcher(
105 searchContext.getCompanyId(), true);
106
107 BooleanQuery suggestKeywordQuery = new BooleanQuery();
108
109 addTermQuery(
110 suggestKeywordQuery, Field.COMPANY_ID,
111 String.valueOf(searchContext.getCompanyId()), null,
112 BooleanClause.Occur.MUST);
113
114 String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
115 searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
116
117 QueryParser queryParser = new QueryParser(
118 LuceneHelperUtil.getVersion(), localizedKeywordFieldName,
119 LuceneHelperUtil.getAnalyzer());
120
121 Query query = null;
122
123 try {
124 query = queryParser.parse(searchContext.getKeywords());
125 }
126 catch (ParseException e) {
127 query = queryParser.parse(
128 KeywordsUtil.escape(searchContext.getKeywords()));
129 }
130
131 BooleanClause keywordTermQuery = new BooleanClause(
132 query, BooleanClause.Occur.MUST);
133
134 suggestKeywordQuery.add(keywordTermQuery);
135
136 String languageId = searchContext.getLanguageId();
137
138 addTermQuery(
139 suggestKeywordQuery, Field.LANGUAGE_ID, languageId, null,
140 BooleanClause.Occur.MUST);
141 addTermQuery(
142 suggestKeywordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
143 BooleanClause.Occur.MUST);
144
145 return search(
146 indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
147 _relevancyChecker, max);
148 }
149 catch (Exception e) {
150 throw new SearchException("Unable to suggest query", e);
151 }
152 finally {
153 LuceneHelperUtil.cleanUp(indexSearcher);
154 }
155 }
156
157 protected void addNGramTermQuery(
158 BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
159 BooleanClause.Occur occur) {
160
161 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
162 String name = nGramEntry.getKey();
163 String value = nGramEntry.getValue();
164
165 addTermQuery(booleanQuery, name, value, boost, occur);
166 }
167 }
168
169 protected void addTermQuery(
170 BooleanQuery booleanQuery, String termName, String termValue,
171 Float boost, BooleanClause.Occur occur) {
172
173 Query query = new TermQuery(new Term(termName, termValue));
174
175 if (boost != null) {
176 query.setBoost(boost);
177 }
178
179 BooleanClause booleanClause = new BooleanClause(query, occur);
180
181 booleanQuery.add(booleanClause);
182 }
183
184 protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
185 BooleanQuery booleanQuery = new BooleanQuery();
186
187 addTermQuery(
188 booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
189 BooleanClause.Occur.SHOULD);
190
191 if (ArrayUtil.isNotEmpty(groupIds)) {
192 for (long groupId : groupIds) {
193 addTermQuery(
194 booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
195 BooleanClause.Occur.SHOULD);
196 }
197 }
198
199 return booleanQuery;
200 }
201
202 protected BooleanQuery buildNGramQuery(String word) throws SearchException {
203 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(word);
204
205 BooleanQuery booleanQuery = new BooleanQuery();
206
207 if (_boostEnd > 0) {
208 Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
209
210 addNGramTermQuery(
211 booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
212 }
213
214 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
215
216 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
217 String fieldName = entry.getKey();
218
219 for (String nGram : entry.getValue()) {
220 addTermQuery(
221 booleanQuery, fieldName, nGram, null,
222 BooleanClause.Occur.SHOULD);
223 }
224 }
225
226 if (_boostStart > 0) {
227 Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
228
229 addNGramTermQuery(
230 booleanQuery, nGramStarts, _boostStart,
231 BooleanClause.Occur.SHOULD);
232 }
233
234 return booleanQuery;
235 }
236
237 protected BooleanQuery buildSpellCheckQuery(
238 long groupIds[], String word, String languageId)
239 throws SearchException {
240
241 BooleanQuery suggestWordQuery = new BooleanQuery();
242
243 BooleanQuery nGramQuery = buildNGramQuery(word);
244
245 BooleanClause booleanNGramQueryClause = new BooleanClause(
246 nGramQuery, BooleanClause.Occur.MUST);
247
248 suggestWordQuery.add(booleanNGramQueryClause);
249
250 BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
251
252 BooleanClause groupIdQueryClause = new BooleanClause(
253 groupIdQuery, BooleanClause.Occur.MUST);
254
255 suggestWordQuery.add(groupIdQueryClause);
256
257 addTermQuery(
258 suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
259 BooleanClause.Occur.MUST);
260 addTermQuery(
261 suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
262 BooleanClause.Occur.MUST);
263
264 return suggestWordQuery;
265 }
266
267 protected String[] search(
268 IndexSearcher indexSearcher, Query query, String fieldName,
269 RelevancyChecker relevancyChecker, int max)
270 throws IOException {
271
272 int maxScoreDocs = max * 10;
273
274 TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
275
276 ScoreDoc[] scoreDocs = topDocs.scoreDocs;
277
278 SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
279 max, _suggestWordComparator);
280
281 int stop = Math.min(scoreDocs.length, maxScoreDocs);
282
283 for (int i = 0; i < stop; i++) {
284 SuggestWord suggestWord = new SuggestWord();
285
286 Document document = indexSearcher.doc(scoreDocs[i].doc);
287
288 Fieldable fieldable = document.getFieldable(fieldName);
289
290 suggestWord.string = fieldable.stringValue();
291
292 boolean relevant = relevancyChecker.isRelevant(suggestWord);
293
294 if (relevant) {
295 suggestWordQueue.insertWithOverflow(suggestWord);
296 }
297 }
298
299 String[] words = new String[suggestWordQueue.size()];
300
301 for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
302 SuggestWord suggestWord = suggestWordQueue.pop();
303
304 words[i] = suggestWord.string;
305 }
306
307 return words;
308 }
309
310 protected Map<String, List<String>> spellCheckKeywords(
311 List<String> keywords, String localizedFieldName,
312 SearchContext searchContext, String languageId, int max)
313 throws SearchException {
314
315 IndexSearcher indexSearcher = null;
316
317 try {
318 Map<String, List<String>> suggestions =
319 new LinkedHashMap<String, List<String>>();
320
321 float scoresThreshold = searchContext.getScoresThreshold();
322
323 if (scoresThreshold == 0) {
324 scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
325 }
326
327 indexSearcher = LuceneHelperUtil.getSearcher(
328 searchContext.getCompanyId(), true);
329
330 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
331
332 if (indexSearcher.maxDoc() > 0) {
333 ReaderUtil.gatherSubReaders(
334 indexReaders, indexSearcher.getIndexReader());
335 }
336
337 for (String keyword : keywords) {
338 List<String> suggestionsList = Collections.emptyList();
339
340 if (!SpellCheckerUtil.isValidWord(
341 localizedFieldName, keyword, indexReaders)) {
342
343 int frequency = indexSearcher.docFreq(
344 new Term(localizedFieldName, keyword));
345
346 String[] suggestionsArray = null;
347
348 if (frequency > 0) {
349 suggestionsArray = new String[] {keyword};
350 }
351 else {
352 BooleanQuery suggestWordQuery = buildSpellCheckQuery(
353 searchContext.getGroupIds(), keyword, languageId);
354
355 RelevancyChecker relevancyChecker =
356 new StringDistanceRelevancyChecker(
357 keyword, scoresThreshold, _stringDistance);
358
359 suggestionsArray = search(
360 indexSearcher, suggestWordQuery, localizedFieldName,
361 relevancyChecker, max);
362 }
363
364 suggestionsList = Arrays.asList(suggestionsArray);
365 }
366
367 suggestions.put(keyword, suggestionsList);
368 }
369
370 return suggestions;
371 }
372 catch (IOException ioe) {
373 throw new SearchException("Unable to find suggestions", ioe);
374 }
375 finally {
376 LuceneHelperUtil.cleanUp(indexSearcher);
377 }
378 }
379
380 private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
381
382 private float _boostEnd = 1.0f;
383 private float _boostStart = 2.0f;
384 private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
385 private StringDistance _stringDistance;
386 private Comparator<SuggestWord> _suggestWordComparator =
387 SuggestWordQueue.DEFAULT_COMPARATOR;
388
389 }