001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.search.BaseQuerySuggester;
018    import com.liferay.portal.kernel.search.DocumentImpl;
019    import com.liferay.portal.kernel.search.Field;
020    import com.liferay.portal.kernel.search.NGramHolder;
021    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022    import com.liferay.portal.kernel.search.SearchContext;
023    import com.liferay.portal.kernel.search.SearchException;
024    import com.liferay.portal.kernel.search.SuggestionConstants;
025    import com.liferay.portal.kernel.search.TokenizerUtil;
026    import com.liferay.portal.kernel.util.ArrayUtil;
027    import com.liferay.portal.util.PortletKeys;
028    
029    import java.io.IOException;
030    
031    import java.util.ArrayList;
032    import java.util.Arrays;
033    import java.util.Collections;
034    import java.util.Comparator;
035    import java.util.LinkedHashMap;
036    import java.util.List;
037    import java.util.Map;
038    
039    import org.apache.lucene.document.Document;
040    import org.apache.lucene.document.Fieldable;
041    import org.apache.lucene.index.IndexReader;
042    import org.apache.lucene.index.Term;
043    import org.apache.lucene.search.BooleanClause;
044    import org.apache.lucene.search.BooleanQuery;
045    import org.apache.lucene.search.IndexSearcher;
046    import org.apache.lucene.search.Query;
047    import org.apache.lucene.search.ScoreDoc;
048    import org.apache.lucene.search.TermQuery;
049    import org.apache.lucene.search.TopDocs;
050    import org.apache.lucene.search.spell.StringDistance;
051    import org.apache.lucene.search.spell.SuggestWord;
052    import org.apache.lucene.search.spell.SuggestWordQueue;
053    import org.apache.lucene.util.ReaderUtil;
054    
055    /**
056     * @author Michael C. Han
057     */
058    public class LuceneQuerySuggester extends BaseQuerySuggester {
059    
060            public void setBoostEnd(float boostEnd) {
061                    _boostEnd = boostEnd;
062            }
063    
064            public void setBoostStart(float boostStart) {
065                    _boostStart = boostStart;
066            }
067    
068            public void setQuerySuggestionMaxNGramLength(
069                    int querySuggestionMaxNGramLength) {
070    
071                    _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
072            }
073    
074            public void setStringDistance(StringDistance stringDistance) {
075                    _stringDistance = stringDistance;
076            }
077    
078            public void setSuggestWordComparator(
079                    Comparator<SuggestWord> suggestWordComparator) {
080    
081                    _suggestWordComparator = suggestWordComparator;
082            }
083    
084            @Override
085            public Map<String, List<String>> spellCheckKeywords(
086                            SearchContext searchContext, int max)
087                    throws SearchException {
088    
089                    String languageId = searchContext.getLanguageId();
090    
091                    String localizedFieldName = DocumentImpl.getLocalizedName(
092                            languageId, Field.SPELL_CHECK_WORD);
093    
094                    List<String> keywords = TokenizerUtil.tokenize(
095                            localizedFieldName, searchContext.getKeywords(), languageId);
096    
097                    return spellCheckKeywords(
098                            keywords, localizedFieldName, searchContext, languageId, max);
099            }
100    
101            @Override
102            public String[] suggestKeywordQueries(SearchContext searchContext, int max)
103                    throws SearchException {
104    
105                    IndexSearcher indexSearcher = null;
106    
107                    try {
108                            indexSearcher = LuceneHelperUtil.getSearcher(
109                                    searchContext.getCompanyId(), true);
110    
111                            String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
112                                    searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
113    
114                            BooleanQuery suggestKeywordQuery = buildSpellCheckQuery(
115                                    searchContext.getGroupIds(), searchContext.getKeywords(),
116                                    searchContext.getLanguageId(),
117                                    SuggestionConstants.TYPE_QUERY_SUGGESTION,
118                                    _querySuggestionMaxNGramLength);
119    
120                            return search(
121                                    indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
122                                    _relevancyChecker, max);
123                    }
124                    catch (Exception e) {
125                            throw new SearchException("Unable to suggest query", e);
126                    }
127                    finally {
128                            LuceneHelperUtil.cleanUp(indexSearcher);
129                    }
130            }
131    
132            protected void addNGramTermQuery(
133                    BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
134                    BooleanClause.Occur occur) {
135    
136                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
137                            String name = nGramEntry.getKey();
138                            String value = nGramEntry.getValue();
139    
140                            addTermQuery(booleanQuery, name, value, boost, occur);
141                    }
142            }
143    
144            protected void addTermQuery(
145                    BooleanQuery booleanQuery, String termName, String termValue,
146                    Float boost, BooleanClause.Occur occur) {
147    
148                    Query query = new TermQuery(new Term(termName, termValue));
149    
150                    if (boost != null) {
151                            query.setBoost(boost);
152                    }
153    
154                    BooleanClause booleanClause = new BooleanClause(query, occur);
155    
156                    booleanQuery.add(booleanClause);
157            }
158    
159            protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
160                    BooleanQuery booleanQuery = new BooleanQuery();
161    
162                    addTermQuery(
163                            booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
164                            BooleanClause.Occur.SHOULD);
165    
166                    if (ArrayUtil.isNotEmpty(groupIds)) {
167                            for (long groupId : groupIds) {
168                                    addTermQuery(
169                                            booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
170                                            BooleanClause.Occur.SHOULD);
171                            }
172                    }
173    
174                    return booleanQuery;
175            }
176    
177            protected BooleanQuery buildNGramQuery(String word, int maxNGramLength)
178                    throws SearchException {
179    
180                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
181                            word, maxNGramLength);
182    
183                    BooleanQuery booleanQuery = new BooleanQuery();
184    
185                    if (_boostEnd > 0) {
186                            Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
187    
188                            addNGramTermQuery(
189                                    booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
190                    }
191    
192                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
193    
194                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
195                            String fieldName = entry.getKey();
196    
197                            for (String nGram : entry.getValue()) {
198                                    addTermQuery(
199                                            booleanQuery, fieldName, nGram, null,
200                                            BooleanClause.Occur.SHOULD);
201                            }
202                    }
203    
204                    if (_boostStart > 0) {
205                            Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
206    
207                            addNGramTermQuery(
208                                    booleanQuery, nGramStarts, _boostStart,
209                                    BooleanClause.Occur.SHOULD);
210                    }
211    
212                    return booleanQuery;
213            }
214    
215            protected BooleanQuery buildSpellCheckQuery(
216                            long groupIds[], String word, String languageId,
217                            String typeFieldValue, int maxNGramLength)
218                    throws SearchException {
219    
220                    BooleanQuery suggestWordQuery = new BooleanQuery();
221    
222                    BooleanQuery nGramQuery = buildNGramQuery(word, maxNGramLength);
223    
224                    BooleanClause booleanNGramQueryClause = new BooleanClause(
225                            nGramQuery, BooleanClause.Occur.MUST);
226    
227                    suggestWordQuery.add(booleanNGramQueryClause);
228    
229                    BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
230    
231                    BooleanClause groupIdQueryClause = new BooleanClause(
232                            groupIdQuery, BooleanClause.Occur.MUST);
233    
234                    suggestWordQuery.add(groupIdQueryClause);
235    
236                    addTermQuery(
237                            suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
238                            BooleanClause.Occur.MUST);
239                    addTermQuery(
240                            suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
241                            BooleanClause.Occur.MUST);
242                    addTermQuery(
243                            suggestWordQuery, Field.TYPE, typeFieldValue, null,
244                            BooleanClause.Occur.MUST);
245    
246                    return suggestWordQuery;
247            }
248    
249            protected String[] search(
250                            IndexSearcher indexSearcher, Query query, String fieldName,
251                            RelevancyChecker relevancyChecker, int max)
252                    throws IOException {
253    
254                    int maxScoreDocs = max * 10;
255    
256                    TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
257    
258                    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
259    
260                    SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
261                            max, _suggestWordComparator);
262    
263                    int stop = Math.min(scoreDocs.length, maxScoreDocs);
264    
265                    for (int i = 0; i < stop; i++) {
266                            SuggestWord suggestWord = new SuggestWord();
267    
268                            Document document = indexSearcher.doc(scoreDocs[i].doc);
269    
270                            Fieldable fieldable = document.getFieldable(fieldName);
271    
272                            suggestWord.string = fieldable.stringValue();
273    
274                            boolean relevant = relevancyChecker.isRelevant(suggestWord);
275    
276                            if (relevant) {
277                                    suggestWordQueue.insertWithOverflow(suggestWord);
278                            }
279                    }
280    
281                    String[] words = new String[suggestWordQueue.size()];
282    
283                    for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
284                            SuggestWord suggestWord = suggestWordQueue.pop();
285    
286                            words[i] = suggestWord.string;
287                    }
288    
289                    return words;
290            }
291    
292            protected Map<String, List<String>> spellCheckKeywords(
293                            List<String> keywords, String localizedFieldName,
294                            SearchContext searchContext, String languageId, int max)
295                    throws SearchException {
296    
297                    IndexSearcher indexSearcher = null;
298    
299                    try {
300                            Map<String, List<String>> suggestions =
301                                    new LinkedHashMap<String, List<String>>();
302    
303                            float scoresThreshold = searchContext.getScoresThreshold();
304    
305                            if (scoresThreshold == 0) {
306                                    scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
307                            }
308    
309                            indexSearcher = LuceneHelperUtil.getSearcher(
310                                    searchContext.getCompanyId(), true);
311    
312                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
313    
314                            if (indexSearcher.maxDoc() > 0) {
315                                    ReaderUtil.gatherSubReaders(
316                                            indexReaders, indexSearcher.getIndexReader());
317                            }
318    
319                            for (String keyword : keywords) {
320                                    List<String> suggestionsList = Collections.emptyList();
321    
322                                    if (!SpellCheckerUtil.isValidWord(
323                                                    localizedFieldName, keyword, indexReaders)) {
324    
325                                            int frequency = indexSearcher.docFreq(
326                                                    new Term(localizedFieldName, keyword));
327    
328                                            String[] suggestionsArray = null;
329    
330                                            if (frequency > 0) {
331                                                    suggestionsArray = new String[] {keyword};
332                                            }
333                                            else {
334                                                    BooleanQuery suggestWordQuery = buildSpellCheckQuery(
335                                                            searchContext.getGroupIds(), keyword, languageId,
336                                                            SuggestionConstants.TYPE_SPELL_CHECKER, 0);
337    
338                                                    RelevancyChecker relevancyChecker =
339                                                            new StringDistanceRelevancyChecker(
340                                                                    keyword, scoresThreshold, _stringDistance);
341    
342                                                    suggestionsArray = search(
343                                                            indexSearcher, suggestWordQuery, localizedFieldName,
344                                                            relevancyChecker, max);
345                                            }
346    
347                                            suggestionsList = Arrays.asList(suggestionsArray);
348                                    }
349    
350                                    suggestions.put(keyword, suggestionsList);
351                            }
352    
353                            return suggestions;
354                    }
355                    catch (IOException ioe) {
356                            throw new SearchException("Unable to find suggestions", ioe);
357                    }
358                    finally {
359                            LuceneHelperUtil.cleanUp(indexSearcher);
360                    }
361            }
362    
363            private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
364    
365            private float _boostEnd = 1.0f;
366            private float _boostStart = 2.0f;
367            private int _querySuggestionMaxNGramLength = 50;
368            private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
369            private StringDistance _stringDistance;
370            private Comparator<SuggestWord> _suggestWordComparator =
371                    SuggestWordQueue.DEFAULT_COMPARATOR;
372    
373    }