001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.search.BaseQuerySuggester;
018    import com.liferay.portal.kernel.search.DocumentImpl;
019    import com.liferay.portal.kernel.search.Field;
020    import com.liferay.portal.kernel.search.NGramHolder;
021    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022    import com.liferay.portal.kernel.search.SearchContext;
023    import com.liferay.portal.kernel.search.SearchException;
024    import com.liferay.portal.kernel.search.TokenizerUtil;
025    import com.liferay.portal.kernel.util.ArrayUtil;
026    import com.liferay.portal.util.PortletKeys;
027    import com.liferay.util.lucene.KeywordsUtil;
028    
029    import java.io.IOException;
030    
031    import java.util.ArrayList;
032    import java.util.Arrays;
033    import java.util.Collections;
034    import java.util.Comparator;
035    import java.util.LinkedHashMap;
036    import java.util.List;
037    import java.util.Map;
038    
039    import org.apache.lucene.document.Document;
040    import org.apache.lucene.document.Fieldable;
041    import org.apache.lucene.index.IndexReader;
042    import org.apache.lucene.index.Term;
043    import org.apache.lucene.queryParser.ParseException;
044    import org.apache.lucene.queryParser.QueryParser;
045    import org.apache.lucene.search.BooleanClause;
046    import org.apache.lucene.search.BooleanQuery;
047    import org.apache.lucene.search.IndexSearcher;
048    import org.apache.lucene.search.Query;
049    import org.apache.lucene.search.ScoreDoc;
050    import org.apache.lucene.search.TermQuery;
051    import org.apache.lucene.search.TopDocs;
052    import org.apache.lucene.search.spell.StringDistance;
053    import org.apache.lucene.search.spell.SuggestWord;
054    import org.apache.lucene.search.spell.SuggestWordQueue;
055    import org.apache.lucene.util.ReaderUtil;
056    
057    /**
058     * @author Michael C. Han
059     */
060    public class LuceneQuerySuggester extends BaseQuerySuggester {
061    
062            public void setBoostEnd(float boostEnd) {
063                    _boostEnd = boostEnd;
064            }
065    
066            public void setBoostStart(float boostStart) {
067                    _boostStart = boostStart;
068            }
069    
070            public void setStringDistance(StringDistance stringDistance) {
071                    _stringDistance = stringDistance;
072            }
073    
074            public void setSuggestWordComparator(
075                    Comparator<SuggestWord> suggestWordComparator) {
076    
077                    _suggestWordComparator = suggestWordComparator;
078            }
079    
080            @Override
081            public Map<String, List<String>> spellCheckKeywords(
082                            SearchContext searchContext, int max)
083                    throws SearchException {
084    
085                    String languageId = searchContext.getLanguageId();
086    
087                    String localizedFieldName = DocumentImpl.getLocalizedName(
088                            languageId, Field.SPELL_CHECK_WORD);
089    
090                    List<String> keywords = TokenizerUtil.tokenize(
091                            localizedFieldName, searchContext.getKeywords(), languageId);
092    
093                    return spellCheckKeywords(
094                            keywords, localizedFieldName, searchContext, languageId, max);
095            }
096    
097            @Override
098            public String[] suggestKeywordQueries(SearchContext searchContext, int max)
099                    throws SearchException {
100    
101                    IndexSearcher indexSearcher = null;
102    
103                    try {
104                            indexSearcher = LuceneHelperUtil.getSearcher(
105                                    searchContext.getCompanyId(), true);
106    
107                            BooleanQuery suggestKeywordQuery = new BooleanQuery();
108    
109                            addTermQuery(
110                                    suggestKeywordQuery, Field.COMPANY_ID,
111                                    String.valueOf(searchContext.getCompanyId()), null,
112                                    BooleanClause.Occur.MUST);
113    
114                            String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
115                                    searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
116    
117                            QueryParser queryParser = new QueryParser(
118                                    LuceneHelperUtil.getVersion(), localizedKeywordFieldName,
119                                    LuceneHelperUtil.getAnalyzer());
120    
121                            Query query = null;
122    
123                            try {
124                                    query = queryParser.parse(searchContext.getKeywords());
125                            }
126                            catch (ParseException e) {
127                                    query = queryParser.parse(
128                                            KeywordsUtil.escape(searchContext.getKeywords()));
129                            }
130    
131                            BooleanClause keywordTermQuery = new BooleanClause(
132                                    query, BooleanClause.Occur.MUST);
133    
134                            suggestKeywordQuery.add(keywordTermQuery);
135    
136                            String languageId = searchContext.getLanguageId();
137    
138                            addTermQuery(
139                                    suggestKeywordQuery, Field.LANGUAGE_ID, languageId, null,
140                                    BooleanClause.Occur.MUST);
141                            addTermQuery(
142                                    suggestKeywordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
143                                    BooleanClause.Occur.MUST);
144    
145                            return search(
146                                    indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
147                                    _relevancyChecker, max);
148                    }
149                    catch (Exception e) {
150                            throw new SearchException("Unable to suggest query", e);
151                    }
152                    finally {
153                            LuceneHelperUtil.cleanUp(indexSearcher);
154                    }
155            }
156    
157            protected void addNGramTermQuery(
158                    BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
159                    BooleanClause.Occur occur) {
160    
161                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
162                            String name = nGramEntry.getKey();
163                            String value = nGramEntry.getValue();
164    
165                            addTermQuery(booleanQuery, name, value, boost, occur);
166                    }
167            }
168    
169            protected void addTermQuery(
170                    BooleanQuery booleanQuery, String termName, String termValue,
171                    Float boost, BooleanClause.Occur occur) {
172    
173                    Query query = new TermQuery(new Term(termName, termValue));
174    
175                    if (boost != null) {
176                            query.setBoost(boost);
177                    }
178    
179                    BooleanClause booleanClause = new BooleanClause(query, occur);
180    
181                    booleanQuery.add(booleanClause);
182            }
183    
184            protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
185                    BooleanQuery booleanQuery = new BooleanQuery();
186    
187                    addTermQuery(
188                            booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
189                            BooleanClause.Occur.SHOULD);
190    
191                    if (ArrayUtil.isNotEmpty(groupIds)) {
192                            for (long groupId : groupIds) {
193                                    addTermQuery(
194                                            booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
195                                            BooleanClause.Occur.SHOULD);
196                            }
197                    }
198    
199                    return booleanQuery;
200            }
201    
202            protected BooleanQuery buildNGramQuery(String word) throws SearchException {
203                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(word);
204    
205                    BooleanQuery booleanQuery = new BooleanQuery();
206    
207                    if (_boostEnd > 0) {
208                            Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
209    
210                            addNGramTermQuery(
211                                    booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
212                    }
213    
214                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
215    
216                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
217                            String fieldName = entry.getKey();
218    
219                            for (String nGram : entry.getValue()) {
220                                    addTermQuery(
221                                            booleanQuery, fieldName, nGram, null,
222                                            BooleanClause.Occur.SHOULD);
223                            }
224                    }
225    
226                    if (_boostStart > 0) {
227                            Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
228    
229                            addNGramTermQuery(
230                                    booleanQuery, nGramStarts, _boostStart,
231                                    BooleanClause.Occur.SHOULD);
232                    }
233    
234                    return booleanQuery;
235            }
236    
237            protected BooleanQuery buildSpellCheckQuery(
238                            long groupIds[], String word, String languageId)
239                    throws SearchException {
240    
241                    BooleanQuery suggestWordQuery = new BooleanQuery();
242    
243                    BooleanQuery nGramQuery = buildNGramQuery(word);
244    
245                    BooleanClause booleanNGramQueryClause = new BooleanClause(
246                            nGramQuery, BooleanClause.Occur.MUST);
247    
248                    suggestWordQuery.add(booleanNGramQueryClause);
249    
250                    BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
251    
252                    BooleanClause groupIdQueryClause = new BooleanClause(
253                            groupIdQuery, BooleanClause.Occur.MUST);
254    
255                    suggestWordQuery.add(groupIdQueryClause);
256    
257                    addTermQuery(
258                            suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
259                            BooleanClause.Occur.MUST);
260                    addTermQuery(
261                            suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
262                            BooleanClause.Occur.MUST);
263    
264                    return suggestWordQuery;
265            }
266    
267            protected String[] search(
268                            IndexSearcher indexSearcher, Query query, String fieldName,
269                            RelevancyChecker relevancyChecker, int max)
270                    throws IOException {
271    
272                    int maxScoreDocs = max * 10;
273    
274                    TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
275    
276                    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
277    
278                    SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
279                            max, _suggestWordComparator);
280    
281                    int stop = Math.min(scoreDocs.length, maxScoreDocs);
282    
283                    for (int i = 0; i < stop; i++) {
284                            SuggestWord suggestWord = new SuggestWord();
285    
286                            Document document = indexSearcher.doc(scoreDocs[i].doc);
287    
288                            Fieldable fieldable = document.getFieldable(fieldName);
289    
290                            suggestWord.string = fieldable.stringValue();
291    
292                            boolean relevant = relevancyChecker.isRelevant(suggestWord);
293    
294                            if (relevant) {
295                                    suggestWordQueue.insertWithOverflow(suggestWord);
296                            }
297                    }
298    
299                    String[] words = new String[suggestWordQueue.size()];
300    
301                    for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
302                            SuggestWord suggestWord = suggestWordQueue.pop();
303    
304                            words[i] = suggestWord.string;
305                    }
306    
307                    return words;
308            }
309    
310            protected Map<String, List<String>> spellCheckKeywords(
311                            List<String> keywords, String localizedFieldName,
312                            SearchContext searchContext, String languageId, int max)
313                    throws SearchException {
314    
315                    IndexSearcher indexSearcher = null;
316    
317                    try {
318                            Map<String, List<String>> suggestions =
319                                    new LinkedHashMap<String, List<String>>();
320    
321                            float scoresThreshold = searchContext.getScoresThreshold();
322    
323                            if (scoresThreshold == 0) {
324                                    scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
325                            }
326    
327                            indexSearcher = LuceneHelperUtil.getSearcher(
328                                    searchContext.getCompanyId(), true);
329    
330                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
331    
332                            if (indexSearcher.maxDoc() > 0) {
333                                    ReaderUtil.gatherSubReaders(
334                                            indexReaders, indexSearcher.getIndexReader());
335                            }
336    
337                            for (String keyword : keywords) {
338                                    List<String> suggestionsList = Collections.emptyList();
339    
340                                    if (!SpellCheckerUtil.isValidWord(
341                                                    localizedFieldName, keyword, indexReaders)) {
342    
343                                            int frequency = indexSearcher.docFreq(
344                                                    new Term(localizedFieldName, keyword));
345    
346                                            String[] suggestionsArray = null;
347    
348                                            if (frequency > 0) {
349                                                    suggestionsArray = new String[] {keyword};
350                                            }
351                                            else {
352                                                    BooleanQuery suggestWordQuery = buildSpellCheckQuery(
353                                                            searchContext.getGroupIds(), keyword, languageId);
354    
355                                                    RelevancyChecker relevancyChecker =
356                                                            new StringDistanceRelevancyChecker(
357                                                                    keyword, scoresThreshold, _stringDistance);
358    
359                                                    suggestionsArray = search(
360                                                            indexSearcher, suggestWordQuery, localizedFieldName,
361                                                            relevancyChecker, max);
362                                            }
363    
364                                            suggestionsList = Arrays.asList(suggestionsArray);
365                                    }
366    
367                                    suggestions.put(keyword, suggestionsList);
368                            }
369    
370                            return suggestions;
371                    }
372                    catch (IOException ioe) {
373                            throw new SearchException("Unable to find suggestions", ioe);
374                    }
375                    finally {
376                            LuceneHelperUtil.cleanUp(indexSearcher);
377                    }
378            }
379    
380            private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
381    
382            private float _boostEnd = 1.0f;
383            private float _boostStart = 2.0f;
384            private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
385            private StringDistance _stringDistance;
386            private Comparator<SuggestWord> _suggestWordComparator =
387                    SuggestWordQueue.DEFAULT_COMPARATOR;
388    
389    }