001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020    import com.liferay.portal.kernel.search.DictionaryEntry;
021    import com.liferay.portal.kernel.search.DictionaryReader;
022    import com.liferay.portal.kernel.search.DocumentImpl;
023    import com.liferay.portal.kernel.search.NGramHolder;
024    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025    import com.liferay.portal.kernel.search.SearchContext;
026    import com.liferay.portal.kernel.search.SearchException;
027    import com.liferay.portal.kernel.search.SuggestionConstants;
028    import com.liferay.portal.kernel.util.StringPool;
029    import com.liferay.portal.util.PortletKeys;
030    
031    import java.io.IOException;
032    import java.io.InputStream;
033    
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Iterator;
037    import java.util.List;
038    import java.util.Map;
039    
040    import org.apache.lucene.document.Document;
041    import org.apache.lucene.document.Field;
042    import org.apache.lucene.index.FieldInfo;
043    import org.apache.lucene.index.IndexReader;
044    import org.apache.lucene.index.Term;
045    import org.apache.lucene.search.IndexSearcher;
046    import org.apache.lucene.util.ReaderUtil;
047    
048    /**
049     * @author Michael C. Han
050     */
051    public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052    
053            @Override
054            public void clearQuerySuggestionDictionaryIndexes(
055                            SearchContext searchContext)
056                    throws SearchException {
057    
058                    Term term = new Term(
059                            com.liferay.portal.kernel.search.Field.TYPE,
060                            SuggestionConstants.TYPE_QUERY_SUGGESTION);
061    
062                    try {
063                            LuceneHelperUtil.deleteDocuments(
064                                    searchContext.getCompanyId(), term);
065                    }
066                    catch (IOException e) {
067                            throw new SearchException(e);
068                    }
069            }
070    
071            @Override
072            public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073                    throws SearchException {
074    
075                    Term term = new Term(
076                            com.liferay.portal.kernel.search.Field.TYPE,
077                            SuggestionConstants.TYPE_SPELL_CHECKER);
078    
079                    try {
080                            LuceneHelperUtil.deleteDocuments(
081                                    searchContext.getCompanyId(), term);
082                    }
083                    catch (IOException e) {
084                            throw new SearchException(e);
085                    }
086            }
087    
088            protected void addField(
089                    Document document, String fieldName, String fieldValue,
090                    Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091                    boolean omitNorms) {
092    
093                    Field field = new Field(
094                            fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095    
096                    field.setIndexOptions(indexOptions);
097                    field.setOmitNorms(omitNorms);
098    
099                    document.add(field);
100            }
101    
102            protected void addNGramFields(
103                    Document document, Map<String, String> nGrams) {
104    
105                    for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106                            String fieldName = entry.getKey();
107                            String fieldValue = entry.getValue();
108    
109                            addField(
110                                    document, fieldName, fieldValue, Field.Store.NO,
111                                    FieldInfo.IndexOptions.DOCS_ONLY, true);
112                    }
113            }
114    
115            protected Document createDocument(
116                            long companyId, long groupId, String languageId,
117                            String localizedFieldName, String word, float weight,
118                            String typeFieldValue, int maxNGramLength)
119                    throws SearchException {
120    
121                    Document document = new Document();
122    
123                    addField(
124                            document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125                            String.valueOf(groupId), Field.Store.YES,
126                            FieldInfo.IndexOptions.DOCS_ONLY, true);
127                    addField(
128                            document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129                            languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130                            true);
131                    addField(
132                            document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133                            PortletKeys.SEARCH, Field.Store.YES,
134                            FieldInfo.IndexOptions.DOCS_ONLY, true);
135                    addField(
136                            document, com.liferay.portal.kernel.search.Field.PRIORITY,
137                            String.valueOf(weight), Field.Store.YES,
138                            FieldInfo.IndexOptions.DOCS_ONLY, true);
139                    addField(
140                            document, com.liferay.portal.kernel.search.Field.TYPE,
141                            typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142                            true);
143                    addField(
144                            document, com.liferay.portal.kernel.search.Field.UID,
145                            getUID(companyId, languageId, word), Field.Store.YES,
146                            FieldInfo.IndexOptions.DOCS_ONLY, true);
147                    addField(
148                            document, localizedFieldName, word, Field.Store.YES,
149                            FieldInfo.IndexOptions.DOCS_ONLY, true);
150    
151                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152                            word, maxNGramLength);
153    
154                    addNGramFields(document, nGramHolder.getNGramEnds());
155    
156                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157    
158                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159                            String fieldName = entry.getKey();
160    
161                            for (String nGram : entry.getValue()) {
162                                    addField(
163                                            document, fieldName, nGram, Field.Store.NO,
164                                            FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165                            }
166                    }
167    
168                    addNGramFields(document, nGramHolder.getNGramStarts());
169    
170                    return document;
171            }
172    
173            @Override
174            protected void indexKeyword(
175                            SearchContext searchContext, long groupId, String languageId,
176                            String keyword, float weight, String keywordFieldName,
177                            String typeFieldValue, int maxNGramLength)
178                    throws Exception {
179    
180                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181                            searchContext.getCompanyId());
182    
183                    IndexSearcher indexSearcher = null;
184    
185                    try {
186                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187    
188                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
189                                    searchContext.getCompanyId());
190    
191                            if (indexSearcher.maxDoc() > 0) {
192                                    ReaderUtil.gatherSubReaders(
193                                            indexReaders, indexSearcher.getIndexReader());
194                            }
195    
196                            String localizedFieldName = DocumentImpl.getLocalizedName(
197                                    languageId, keywordFieldName);
198    
199                            boolean validWord = isValidWord(
200                                    localizedFieldName, keyword, indexReaders);
201    
202                            if (!validWord) {
203                                    if (_log.isInfoEnabled()) {
204                                            _log.info(
205                                                    "Not indexing because keyword " + keyword +
206                                                            " is invalid");
207                                    }
208    
209                                    return;
210                            }
211    
212                            Document document = createDocument(
213                                    searchContext.getCompanyId(), groupId, languageId,
214                                    localizedFieldName, keyword, weight, typeFieldValue,
215                                    maxNGramLength);
216    
217                            indexAccessor.addDocument(document);
218                    }
219                    finally {
220                            try {
221                                    LuceneHelperUtil.releaseIndexSearcher(
222                                            searchContext.getCompanyId(), indexSearcher);
223                            }
224                            catch (IOException ioe) {
225                                    _log.error("Unable to release searcher", ioe);
226                            }
227                    }
228            }
229    
230            @Override
231            protected void indexKeywords(
232                            SearchContext searchContext, long groupId, String languageId,
233                            InputStream inputStream, String keywordFieldName,
234                            String typeFieldValue, int maxNGramLength)
235                    throws Exception {
236    
237                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
238                            searchContext.getCompanyId());
239    
240                    IndexSearcher indexSearcher = null;
241    
242                    try {
243                            String localizedFieldName = DocumentImpl.getLocalizedName(
244                                    languageId, keywordFieldName);
245    
246                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
247                                    searchContext.getCompanyId());
248    
249                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
250    
251                            if (indexSearcher.maxDoc() > 0) {
252                                    ReaderUtil.gatherSubReaders(
253                                            indexReaders, indexSearcher.getIndexReader());
254                            }
255    
256                            Collection<Document> documents = new ArrayList<Document>();
257    
258                            DictionaryReader dictionaryReader = new DictionaryReader(
259                                    inputStream, StringPool.UTF8);
260    
261                            Iterator<DictionaryEntry> iterator =
262                                    dictionaryReader.getDictionaryEntriesIterator();
263    
264                            while (iterator.hasNext()) {
265                                    DictionaryEntry dictionaryEntry = iterator.next();
266    
267                                    String word = dictionaryEntry.getWord();
268    
269                                    boolean validWord = isValidWord(
270                                            localizedFieldName, word, indexReaders);
271    
272                                    if (!validWord) {
273                                            if (_log.isInfoEnabled()) {
274                                                    _log.info(
275                                                            "Not indexing because word " + word +
276                                                                    " is invalid");
277                                            }
278    
279                                            continue;
280                                    }
281    
282                                    Document document = createDocument(
283                                            searchContext.getCompanyId(), groupId, languageId,
284                                            localizedFieldName, word, dictionaryEntry.getWeight(),
285                                            typeFieldValue, maxNGramLength);
286    
287                                    documents.add(document);
288                            }
289    
290                            indexAccessor.addDocuments(documents);
291                    }
292                    finally {
293                            try {
294                                    LuceneHelperUtil.releaseIndexSearcher(
295                                            searchContext.getCompanyId(), indexSearcher);
296                            }
297                            catch (IOException ioe) {
298                                    _log.error("Unable to release searcher", ioe);
299                            }
300                    }
301            }
302    
303            protected boolean isValidWord(
304                            String localizedFieldName, String word,
305                            List<IndexReader> indexReaders)
306                    throws IOException {
307    
308                    if (word.length() < _MINIMUM_WORD_LENGTH) {
309                            return false;
310                    }
311    
312                    if (SpellCheckerUtil.isValidWord(
313                                    localizedFieldName, word, indexReaders)) {
314    
315                            return false;
316                    }
317    
318                    return true;
319            }
320    
321            private static final int _MINIMUM_WORD_LENGTH = 3;
322    
323            private static final Log _log = LogFactoryUtil.getLog(
324                    LuceneSpellCheckIndexWriter.class);
325    
326    }