001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020    import com.liferay.portal.kernel.search.DictionaryEntry;
021    import com.liferay.portal.kernel.search.DictionaryReader;
022    import com.liferay.portal.kernel.search.DocumentImpl;
023    import com.liferay.portal.kernel.search.NGramHolder;
024    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025    import com.liferay.portal.kernel.search.SearchContext;
026    import com.liferay.portal.kernel.search.SearchException;
027    import com.liferay.portal.kernel.search.SuggestionConstants;
028    import com.liferay.portal.kernel.util.StringPool;
029    import com.liferay.portal.util.PortletKeys;
030    
031    import java.io.IOException;
032    import java.io.InputStream;
033    
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Iterator;
037    import java.util.List;
038    import java.util.Map;
039    
040    import org.apache.lucene.document.Document;
041    import org.apache.lucene.document.Field;
042    import org.apache.lucene.index.FieldInfo;
043    import org.apache.lucene.index.IndexReader;
044    import org.apache.lucene.index.Term;
045    import org.apache.lucene.search.IndexSearcher;
046    import org.apache.lucene.util.ReaderUtil;
047    
048    /**
049     * @author Michael C. Han
050     */
051    public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052    
053            @Override
054            public void clearQuerySuggestionDictionaryIndexes(
055                            SearchContext searchContext)
056                    throws SearchException {
057    
058                    Term term = new Term(
059                            com.liferay.portal.kernel.search.Field.TYPE,
060                            SuggestionConstants.TYPE_QUERY_SUGGESTION);
061    
062                    try {
063                            LuceneHelperUtil.deleteDocuments(
064                                    searchContext.getCompanyId(), term);
065                    }
066                    catch (IOException e) {
067                            throw new SearchException(e);
068                    }
069            }
070    
071            @Override
072            public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073                    throws SearchException {
074    
075                    Term term = new Term(
076                            com.liferay.portal.kernel.search.Field.TYPE,
077                            SuggestionConstants.TYPE_SPELL_CHECKER);
078    
079                    try {
080                            LuceneHelperUtil.deleteDocuments(
081                                    searchContext.getCompanyId(), term);
082                    }
083                    catch (IOException e) {
084                            throw new SearchException(e);
085                    }
086            }
087    
088            protected void addField(
089                    Document document, String fieldName, String fieldValue,
090                    Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091                    boolean omitNorms) {
092    
093                    Field field = new Field(
094                            fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095    
096                    field.setIndexOptions(indexOptions);
097                    field.setOmitNorms(omitNorms);
098    
099                    document.add(field);
100            }
101    
102            protected void addNGramFields(
103                    Document document, Map<String, String> nGrams) {
104    
105                    for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106                            String fieldName = entry.getKey();
107                            String fieldValue = entry.getValue();
108    
109                            addField(
110                                    document, fieldName, fieldValue, Field.Store.NO,
111                                    FieldInfo.IndexOptions.DOCS_ONLY, true);
112                    }
113            }
114    
115            protected Document createDocument(
116                            long companyId, long groupId, String languageId,
117                            String localizedFieldName, String word, float weight,
118                            String typeFieldValue, int maxNGramLength)
119                    throws SearchException {
120    
121                    Document document = new Document();
122    
123                    addField(
124                            document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125                            String.valueOf(groupId), Field.Store.YES,
126                            FieldInfo.IndexOptions.DOCS_ONLY, true);
127                    addField(
128                            document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129                            languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130                            true);
131                    addField(
132                            document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133                            PortletKeys.SEARCH, Field.Store.YES,
134                            FieldInfo.IndexOptions.DOCS_ONLY, true);
135                    addField(
136                            document, com.liferay.portal.kernel.search.Field.PRIORITY,
137                            String.valueOf(weight), Field.Store.YES,
138                            FieldInfo.IndexOptions.DOCS_ONLY, true);
139                    addField(
140                            document, com.liferay.portal.kernel.search.Field.TYPE,
141                            typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142                            true);
143                    addField(
144                            document, com.liferay.portal.kernel.search.Field.UID,
145                            getUID(companyId, languageId, word), Field.Store.YES,
146                            FieldInfo.IndexOptions.DOCS_ONLY, true);
147                    addField(
148                            document, localizedFieldName, word, Field.Store.YES,
149                            FieldInfo.IndexOptions.DOCS_ONLY, true);
150    
151                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152                            word, maxNGramLength);
153    
154                    addNGramFields(document, nGramHolder.getNGramEnds());
155    
156                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157    
158                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159                            String fieldName = entry.getKey();
160    
161                            for (String nGram : entry.getValue()) {
162                                    addField(
163                                            document, fieldName, nGram, Field.Store.NO,
164                                            FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165                            }
166                    }
167    
168                    addNGramFields(document, nGramHolder.getNGramStarts());
169    
170                    return document;
171            }
172    
173            @Override
174            protected void indexKeyword(
175                            long companyId, long groupId, String languageId, String keyword,
176                            float weight, String keywordFieldName, String typeFieldValue,
177                            int maxNGramLength)
178                    throws Exception {
179    
180                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181                            companyId);
182    
183                    IndexSearcher indexSearcher = null;
184    
185                    try {
186                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187    
188                            indexSearcher = LuceneHelperUtil.getIndexSearcher(companyId);
189    
190                            if (indexSearcher.maxDoc() > 0) {
191                                    ReaderUtil.gatherSubReaders(
192                                            indexReaders, indexSearcher.getIndexReader());
193                            }
194    
195                            String localizedFieldName = DocumentImpl.getLocalizedName(
196                                    languageId, keywordFieldName);
197    
198                            boolean validWord = isValidWord(
199                                    localizedFieldName, keyword, indexReaders);
200    
201                            if (!validWord) {
202                                    if (_log.isInfoEnabled()) {
203                                            _log.info(
204                                                    "Not indexing because keyword " + keyword +
205                                                            " is invalid");
206                                    }
207    
208                                    return;
209                            }
210    
211                            Document document = createDocument(
212                                    companyId, groupId, languageId, localizedFieldName, keyword,
213                                    weight, typeFieldValue, maxNGramLength);
214    
215                            indexAccessor.addDocument(document);
216                    }
217                    finally {
218                            try {
219                                    LuceneHelperUtil.releaseIndexSearcher(companyId, indexSearcher);
220                            }
221                            catch (IOException ioe) {
222                                    _log.error("Unable to release searcher", ioe);
223                            }
224                    }
225            }
226    
227            @Override
228            protected void indexKeywords(
229                            long companyId, long groupId, String languageId,
230                            InputStream inputStream, String keywordFieldName,
231                            String typeFieldValue, int maxNGramLength)
232                    throws Exception {
233    
234                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
235                            companyId);
236    
237                    IndexSearcher indexSearcher = null;
238    
239                    try {
240                            String localizedFieldName = DocumentImpl.getLocalizedName(
241                                    languageId, keywordFieldName);
242    
243                            indexSearcher = LuceneHelperUtil.getIndexSearcher(companyId);
244    
245                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
246    
247                            if (indexSearcher.maxDoc() > 0) {
248                                    ReaderUtil.gatherSubReaders(
249                                            indexReaders, indexSearcher.getIndexReader());
250                            }
251    
252                            Collection<Document> documents = new ArrayList<Document>();
253    
254                            DictionaryReader dictionaryReader = new DictionaryReader(
255                                    inputStream, StringPool.UTF8);
256    
257                            Iterator<DictionaryEntry> iterator =
258                                    dictionaryReader.getDictionaryEntriesIterator();
259    
260                            while (iterator.hasNext()) {
261                                    DictionaryEntry dictionaryEntry = iterator.next();
262    
263                                    String word = dictionaryEntry.getWord();
264    
265                                    boolean validWord = isValidWord(
266                                            localizedFieldName, word, indexReaders);
267    
268                                    if (!validWord) {
269                                            if (_log.isInfoEnabled()) {
270                                                    _log.info(
271                                                            "Not indexing because word " + word +
272                                                                    " is invalid");
273                                            }
274    
275                                            continue;
276                                    }
277    
278                                    Document document = createDocument(
279                                            companyId, groupId, languageId, localizedFieldName, word,
280                                            dictionaryEntry.getWeight(), typeFieldValue,
281                                            maxNGramLength);
282    
283                                    documents.add(document);
284                            }
285    
286                            indexAccessor.addDocuments(documents);
287                    }
288                    finally {
289                            try {
290                                    LuceneHelperUtil.releaseIndexSearcher(companyId, indexSearcher);
291                            }
292                            catch (IOException ioe) {
293                                    _log.error("Unable to release searcher", ioe);
294                            }
295                    }
296            }
297    
298            protected boolean isValidWord(
299                            String localizedFieldName, String word,
300                            List<IndexReader> indexReaders)
301                    throws IOException {
302    
303                    if (word.length() < _MINIMUM_WORD_LENGTH) {
304                            return false;
305                    }
306    
307                    if (SpellCheckerUtil.isValidWord(
308                                    localizedFieldName, word, indexReaders)) {
309    
310                            return false;
311                    }
312    
313                    return true;
314            }
315    
316            private static final int _MINIMUM_WORD_LENGTH = 3;
317    
318            private static Log _log = LogFactoryUtil.getLog(
319                    LuceneSpellCheckIndexWriter.class);
320    
321    }