001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.search.CollatorUtil;
018    import com.liferay.portal.kernel.search.DocumentImpl;
019    import com.liferay.portal.kernel.search.Field;
020    import com.liferay.portal.kernel.search.NGramHolder;
021    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
022    import com.liferay.portal.kernel.search.QuerySuggester;
023    import com.liferay.portal.kernel.search.SearchContext;
024    import com.liferay.portal.kernel.search.SearchException;
025    import com.liferay.portal.kernel.search.TokenizerUtil;
026    import com.liferay.portal.util.PortletKeys;
027    import com.liferay.util.lucene.KeywordsUtil;
028    
029    import java.io.IOException;
030    
031    import java.util.ArrayList;
032    import java.util.Arrays;
033    import java.util.Collections;
034    import java.util.Comparator;
035    import java.util.LinkedHashMap;
036    import java.util.List;
037    import java.util.Map;
038    
039    import org.apache.lucene.document.Document;
040    import org.apache.lucene.document.Fieldable;
041    import org.apache.lucene.index.IndexReader;
042    import org.apache.lucene.index.Term;
043    import org.apache.lucene.queryParser.ParseException;
044    import org.apache.lucene.queryParser.QueryParser;
045    import org.apache.lucene.search.BooleanClause;
046    import org.apache.lucene.search.BooleanQuery;
047    import org.apache.lucene.search.IndexSearcher;
048    import org.apache.lucene.search.Query;
049    import org.apache.lucene.search.ScoreDoc;
050    import org.apache.lucene.search.TermQuery;
051    import org.apache.lucene.search.TopDocs;
052    import org.apache.lucene.search.spell.StringDistance;
053    import org.apache.lucene.search.spell.SuggestWord;
054    import org.apache.lucene.search.spell.SuggestWordQueue;
055    import org.apache.lucene.util.ReaderUtil;
056    
057    /**
058     * @author Michael C. Han
059     */
060    public class LuceneQuerySuggester implements QuerySuggester {
061    
062            public void setBoostEnd(float boostEnd) {
063                    _boostEnd = boostEnd;
064            }
065    
066            public void setBoostStart(float boostStart) {
067                    _boostStart = boostStart;
068            }
069    
070            public void setStringDistance(StringDistance stringDistance) {
071                    _stringDistance = stringDistance;
072            }
073    
074            public void setSuggestWordComparator(
075                    Comparator<SuggestWord> suggestWordComparator) {
076    
077                    _suggestWordComparator = suggestWordComparator;
078            }
079    
080            @Override
081            public String spellCheckKeywords(SearchContext searchContext)
082                    throws SearchException {
083    
084                    String languageId = searchContext.getLanguageId();
085    
086                    String localizedFieldName = DocumentImpl.getLocalizedName(
087                            languageId, Field.SPELL_CHECK_WORD);
088    
089                    List<String> keywords = TokenizerUtil.tokenize(
090                            localizedFieldName, searchContext.getKeywords(), languageId);
091    
092                    Map<String, List<String>> suggestions = spellCheckKeywords(
093                            keywords, localizedFieldName, searchContext, languageId, 1);
094    
095                    return CollatorUtil.collate(suggestions, keywords);
096            }
097    
098            @Override
099            public Map<String, List<String>> spellCheckKeywords(
100                            SearchContext searchContext, int max)
101                    throws SearchException {
102    
103                    String languageId = searchContext.getLanguageId();
104    
105                    String localizedFieldName = DocumentImpl.getLocalizedName(
106                            languageId, Field.SPELL_CHECK_WORD);
107    
108                    List<String> keywords = TokenizerUtil.tokenize(
109                            localizedFieldName, searchContext.getKeywords(), languageId);
110    
111                    return spellCheckKeywords(
112                            keywords, localizedFieldName, searchContext, languageId, max);
113            }
114    
115            @Override
116            public String[] suggestKeywordQueries(SearchContext searchContext, int max)
117                    throws SearchException {
118    
119                    IndexSearcher indexSearcher = null;
120    
121                    try {
122                            indexSearcher = LuceneHelperUtil.getSearcher(
123                                    searchContext.getCompanyId(), true);
124    
125                            BooleanQuery suggestKeywordQuery = new BooleanQuery();
126    
127                            addTermQuery(
128                                    suggestKeywordQuery, Field.COMPANY_ID,
129                                    String.valueOf(searchContext.getCompanyId()), null,
130                                    BooleanClause.Occur.MUST);
131    
132                            String localizedKeywordFieldName = DocumentImpl.getLocalizedName(
133                                    searchContext.getLanguageId(), Field.KEYWORD_SEARCH);
134    
135                            QueryParser queryParser = new QueryParser(
136                                    LuceneHelperUtil.getVersion(), localizedKeywordFieldName,
137                                    LuceneHelperUtil.getAnalyzer());
138    
139                            Query query = null;
140    
141                            try {
142                                    query = queryParser.parse(searchContext.getKeywords());
143                            }
144                            catch (ParseException e) {
145                                    query = queryParser.parse(
146                                            KeywordsUtil.escape(searchContext.getKeywords()));
147                            }
148    
149                            BooleanClause keywordTermQuery = new BooleanClause(
150                                    query, BooleanClause.Occur.MUST);
151    
152                            suggestKeywordQuery.add(keywordTermQuery);
153    
154                            String languageId = searchContext.getLanguageId();
155    
156                            addTermQuery(
157                                    suggestKeywordQuery, Field.LANGUAGE_ID, languageId, null,
158                                    BooleanClause.Occur.MUST);
159                            addTermQuery(
160                                    suggestKeywordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
161                                    BooleanClause.Occur.MUST);
162    
163                            return search(
164                                    indexSearcher, suggestKeywordQuery, localizedKeywordFieldName,
165                                    _relevancyChecker, max);
166                    }
167                    catch (Exception e) {
168                            throw new SearchException("Unable to suggest query", e);
169                    }
170                    finally {
171                            LuceneHelperUtil.cleanUp(indexSearcher);
172                    }
173            }
174    
175            protected void addNGramTermQuery(
176                    BooleanQuery booleanQuery, Map<String, String> nGrams, Float boost,
177                    BooleanClause.Occur occur) {
178    
179                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
180                            String name = nGramEntry.getKey();
181                            String value = nGramEntry.getValue();
182    
183                            addTermQuery(booleanQuery, name, value, boost, occur);
184                    }
185            }
186    
187            protected void addTermQuery(
188                    BooleanQuery booleanQuery, String termName, String termValue,
189                    Float boost, BooleanClause.Occur occur) {
190    
191                    Query query = new TermQuery(new Term(termName, termValue));
192    
193                    if (boost != null) {
194                            query.setBoost(boost);
195                    }
196    
197                    BooleanClause booleanClause = new BooleanClause(query, occur);
198    
199                    booleanQuery.add(booleanClause);
200            }
201    
202            protected BooleanQuery buildGroupIdQuery(long[] groupIds) {
203                    BooleanQuery booleanQuery = new BooleanQuery();
204    
205                    addTermQuery(
206                            booleanQuery, Field.GROUP_ID, String.valueOf(0), null,
207                            BooleanClause.Occur.SHOULD);
208    
209                    if ((groupIds != null) && (groupIds.length > 0)) {
210                            for (long groupId : groupIds) {
211                                    addTermQuery(
212                                            booleanQuery, Field.GROUP_ID, String.valueOf(groupId), null,
213                                            BooleanClause.Occur.SHOULD);
214                            }
215                    }
216    
217                    return booleanQuery;
218            }
219    
220            protected BooleanQuery buildNGramQuery(String word) throws SearchException {
221                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(word);
222    
223                    BooleanQuery booleanQuery = new BooleanQuery();
224    
225                    if (_boostEnd > 0) {
226                            Map<String, String> nGramEnds = nGramHolder.getNGramEnds();
227    
228                            addNGramTermQuery(
229                                    booleanQuery, nGramEnds, _boostEnd, BooleanClause.Occur.SHOULD);
230                    }
231    
232                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
233    
234                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
235                            String fieldName = entry.getKey();
236    
237                            for (String nGram : entry.getValue()) {
238                                    addTermQuery(
239                                            booleanQuery, fieldName, nGram, null,
240                                            BooleanClause.Occur.SHOULD);
241                            }
242                    }
243    
244                    if (_boostStart > 0) {
245                            Map<String, String> nGramStarts = nGramHolder.getNGramStarts();
246    
247                            addNGramTermQuery(
248                                    booleanQuery, nGramStarts, _boostStart,
249                                    BooleanClause.Occur.SHOULD);
250                    }
251    
252                    return booleanQuery;
253            }
254    
255            protected BooleanQuery buildSpellCheckQuery(
256                            long groupIds[], String word, String languageId)
257                    throws SearchException {
258    
259                    BooleanQuery suggestWordQuery = new BooleanQuery();
260    
261                    BooleanQuery nGramQuery = buildNGramQuery(word);
262    
263                    BooleanClause booleanNGramQueryClause = new BooleanClause(
264                            nGramQuery, BooleanClause.Occur.MUST);
265    
266                    suggestWordQuery.add(booleanNGramQueryClause);
267    
268                    BooleanQuery groupIdQuery = buildGroupIdQuery(groupIds);
269    
270                    BooleanClause groupIdQueryClause = new BooleanClause(
271                            groupIdQuery, BooleanClause.Occur.MUST);
272    
273                    suggestWordQuery.add(groupIdQueryClause);
274    
275                    addTermQuery(
276                            suggestWordQuery, Field.LANGUAGE_ID, languageId, null,
277                            BooleanClause.Occur.MUST);
278                    addTermQuery(
279                            suggestWordQuery, Field.PORTLET_ID, PortletKeys.SEARCH, null,
280                            BooleanClause.Occur.MUST);
281    
282                    return suggestWordQuery;
283            }
284    
285            protected String[] search(
286                            IndexSearcher indexSearcher, Query query, String fieldName,
287                            RelevancyChecker relevancyChecker, int max)
288                    throws IOException {
289    
290                    int maxScoreDocs = max * 10;
291    
292                    TopDocs topDocs = indexSearcher.search(query, null, maxScoreDocs);
293    
294                    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
295    
296                    SuggestWordQueue suggestWordQueue = new SuggestWordQueue(
297                            max, _suggestWordComparator);
298    
299                    int stop = Math.min(scoreDocs.length, maxScoreDocs);
300    
301                    for (int i = 0; i < stop; i++) {
302                            SuggestWord suggestWord = new SuggestWord();
303    
304                            Document document = indexSearcher.doc(scoreDocs[i].doc);
305    
306                            Fieldable fieldable = document.getFieldable(fieldName);
307    
308                            suggestWord.string = fieldable.stringValue();
309    
310                            boolean relevant = relevancyChecker.isRelevant(suggestWord);
311    
312                            if (relevant) {
313                                    suggestWordQueue.insertWithOverflow(suggestWord);
314                            }
315                    }
316    
317                    String[] words = new String[suggestWordQueue.size()];
318    
319                    for (int i = suggestWordQueue.size() - 1; i >= 0; i--) {
320                            SuggestWord suggestWord = suggestWordQueue.pop();
321    
322                            words[i] = suggestWord.string;
323                    }
324    
325                    return words;
326            }
327    
328            protected Map<String, List<String>> spellCheckKeywords(
329                            List<String> keywords, String localizedFieldName,
330                            SearchContext searchContext, String languageId, int max)
331                    throws SearchException {
332    
333                    IndexSearcher indexSearcher = null;
334    
335                    try {
336                            Map<String, List<String>> suggestions =
337                                    new LinkedHashMap<String, List<String>>();
338    
339                            float scoresThreshold = searchContext.getScoresThreshold();
340    
341                            if (scoresThreshold == 0) {
342                                    scoresThreshold = _SCORES_THRESHOLD_DEFAULT;
343                            }
344    
345                            indexSearcher = LuceneHelperUtil.getSearcher(
346                                    searchContext.getCompanyId(), true);
347    
348                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
349    
350                            if (indexSearcher.maxDoc() > 0) {
351                                    ReaderUtil.gatherSubReaders(
352                                            indexReaders, indexSearcher.getIndexReader());
353                            }
354    
355                            for (String keyword : keywords) {
356                                    List<String> suggestionsList = Collections.emptyList();
357    
358                                    if (!SpellCheckerUtil.isValidWord(
359                                                    localizedFieldName, keyword, indexReaders)) {
360    
361                                            int frequency = indexSearcher.docFreq(
362                                                    new Term(localizedFieldName, keyword));
363    
364                                            String[] suggestionsArray = null;
365    
366                                            if (frequency > 0) {
367                                                    suggestionsArray = new String[] {keyword};
368                                            }
369                                            else {
370                                                    BooleanQuery suggestWordQuery = buildSpellCheckQuery(
371                                                            searchContext.getGroupIds(), keyword, languageId);
372    
373                                                    RelevancyChecker relevancyChecker =
374                                                            new StringDistanceRelevancyChecker(
375                                                                    keyword, scoresThreshold, _stringDistance);
376    
377                                                    suggestionsArray = search(
378                                                            indexSearcher, suggestWordQuery, localizedFieldName,
379                                                            relevancyChecker, max);
380                                            }
381    
382                                            suggestionsList = Arrays.asList(suggestionsArray);
383                                    }
384    
385                                    suggestions.put(keyword, suggestionsList);
386                            }
387    
388                            return suggestions;
389                    }
390                    catch (IOException ioe) {
391                            throw new SearchException("Unable to find suggestions", ioe);
392                    }
393                    finally {
394                            LuceneHelperUtil.cleanUp(indexSearcher);
395                    }
396            }
397    
398            private static final float _SCORES_THRESHOLD_DEFAULT = 0.5f;
399    
400            private float _boostEnd = 1.0f;
401            private float _boostStart = 2.0f;
402            private RelevancyChecker _relevancyChecker = new DefaultRelevancyChecker();
403            private StringDistance _stringDistance;
404            private Comparator<SuggestWord> _suggestWordComparator =
405                    SuggestWordQueue.DEFAULT_COMPARATOR;
406    
407    }