001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020    import com.liferay.portal.kernel.search.DictionaryEntry;
021    import com.liferay.portal.kernel.search.DictionaryReader;
022    import com.liferay.portal.kernel.search.DocumentImpl;
023    import com.liferay.portal.kernel.search.NGramHolder;
024    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025    import com.liferay.portal.kernel.search.SearchContext;
026    import com.liferay.portal.kernel.search.SearchException;
027    import com.liferay.portal.kernel.search.SuggestionConstants;
028    import com.liferay.portal.kernel.util.StringPool;
029    import com.liferay.portal.util.PortletKeys;
030    
031    import java.io.IOException;
032    import java.io.InputStream;
033    
034    import java.util.ArrayList;
035    import java.util.Collection;
036    import java.util.Iterator;
037    import java.util.List;
038    import java.util.Map;
039    
040    import org.apache.lucene.document.Document;
041    import org.apache.lucene.document.Field;
042    import org.apache.lucene.index.FieldInfo;
043    import org.apache.lucene.index.IndexReader;
044    import org.apache.lucene.index.Term;
045    import org.apache.lucene.search.IndexSearcher;
046    import org.apache.lucene.util.ReaderUtil;
047    
048    /**
049     * @author Michael C. Han
050     */
051    public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052    
053            @Override
054            public void clearQuerySuggestionDictionaryIndexes(
055                            SearchContext searchContext)
056                    throws SearchException {
057    
058                    Term term = new Term(
059                            com.liferay.portal.kernel.search.Field.TYPE,
060                            SuggestionConstants.TYPE_QUERY_SUGGESTION);
061    
062                    try {
063                            LuceneHelperUtil.deleteDocuments(
064                                    searchContext.getCompanyId(), term);
065                    }
066                    catch (IOException e) {
067                            throw new SearchException(e);
068                    }
069            }
070    
071            @Override
072            public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073                    throws SearchException {
074    
075                    Term term = new Term(
076                            com.liferay.portal.kernel.search.Field.TYPE,
077                            SuggestionConstants.TYPE_SPELL_CHECKER);
078    
079                    try {
080                            LuceneHelperUtil.deleteDocuments(
081                                    searchContext.getCompanyId(), term);
082                    }
083                    catch (IOException e) {
084                            throw new SearchException(e);
085                    }
086            }
087    
088            protected void addField(
089                    Document document, String fieldName, String fieldValue,
090                    Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091                    boolean omitNorms) {
092    
093                    Field field = new Field(
094                            fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095    
096                    field.setIndexOptions(indexOptions);
097                    field.setOmitNorms(omitNorms);
098    
099                    document.add(field);
100            }
101    
102            protected void addNGramFields(
103                    Document document, Map<String, String> nGrams) {
104    
105                    for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106                            String fieldName = entry.getKey();
107                            String fieldValue = entry.getValue();
108    
109                            addField(
110                                    document, fieldName, fieldValue, Field.Store.NO,
111                                    FieldInfo.IndexOptions.DOCS_ONLY, true);
112                    }
113            }
114    
115            protected Document createDocument(
116                            long companyId, long groupId, String languageId,
117                            String localizedFieldName, String word, float weight,
118                            String typeFieldValue, int maxNGramLength)
119                    throws SearchException {
120    
121                    Document document = new Document();
122    
123                    addField(
124                            document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125                            String.valueOf(groupId), Field.Store.YES,
126                            FieldInfo.IndexOptions.DOCS_ONLY, true);
127                    addField(
128                            document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129                            languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130                            true);
131                    addField(
132                            document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133                            PortletKeys.SEARCH, Field.Store.YES,
134                            FieldInfo.IndexOptions.DOCS_ONLY, true);
135                    addField(
136                            document, com.liferay.portal.kernel.search.Field.PRIORITY,
137                            String.valueOf(weight), Field.Store.YES,
138                            FieldInfo.IndexOptions.DOCS_ONLY, true);
139                    addField(
140                            document, com.liferay.portal.kernel.search.Field.TYPE,
141                            typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142                            true);
143                    addField(
144                            document, com.liferay.portal.kernel.search.Field.UID,
145                            getUID(companyId, languageId, word), Field.Store.YES,
146                            FieldInfo.IndexOptions.DOCS_ONLY, true);
147                    addField(
148                            document, localizedFieldName, word, Field.Store.YES,
149                            FieldInfo.IndexOptions.DOCS_ONLY, true);
150    
151                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152                            word, maxNGramLength);
153    
154                    addNGramFields(document, nGramHolder.getNGramEnds());
155    
156                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157    
158                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159                            String fieldName = entry.getKey();
160    
161                            for (String nGram : entry.getValue()) {
162                                    addField(
163                                            document, fieldName, nGram, Field.Store.NO,
164                                            FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165                            }
166                    }
167    
168                    addNGramFields(document, nGramHolder.getNGramStarts());
169    
170                    return document;
171            }
172    
173            @Override
174            protected void indexKeyword(
175                            long companyId, long groupId, String languageId, String keyword,
176                            float weight, String keywordFieldName, String typeFieldValue,
177                            int maxNGramLength)
178                    throws Exception {
179    
180                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181                            companyId);
182    
183                    IndexSearcher indexSearcher = null;
184    
185                    try {
186                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187    
188                            indexSearcher = LuceneHelperUtil.getSearcher(
189                                    indexAccessor.getCompanyId(), true);
190    
191                            if (indexSearcher.maxDoc() > 0) {
192                                    ReaderUtil.gatherSubReaders(
193                                            indexReaders, indexSearcher.getIndexReader());
194                            }
195    
196                            String localizedFieldName = DocumentImpl.getLocalizedName(
197                                    languageId, keywordFieldName);
198    
199                            boolean validWord = isValidWord(
200                                    localizedFieldName, keyword, indexReaders);
201    
202                            if (!validWord) {
203                                    if (_log.isInfoEnabled()) {
204                                            _log.info(
205                                                    "Not indexing because keyword " + keyword +
206                                                            " is invalid");
207                                    }
208    
209                                    return;
210                            }
211    
212                            Document document = createDocument(
213                                    companyId, groupId, languageId, localizedFieldName, keyword,
214                                    weight, typeFieldValue, maxNGramLength);
215    
216                            indexAccessor.addDocument(document);
217                    }
218                    finally {
219                            LuceneHelperUtil.cleanUp(indexSearcher);
220                    }
221            }
222    
223            @Override
224            protected void indexKeywords(
225                            long companyId, long groupId, String languageId,
226                            InputStream inputStream, String keywordFieldName,
227                            String typeFieldValue, int maxNGramLength)
228                    throws Exception {
229    
230                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
231                            companyId);
232    
233                    IndexSearcher indexSearcher = null;
234    
235                    try {
236                            String localizedFieldName = DocumentImpl.getLocalizedName(
237                                    languageId, keywordFieldName);
238    
239                            indexSearcher = LuceneHelperUtil.getSearcher(
240                                    indexAccessor.getCompanyId(), true);
241    
242                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
243    
244                            if (indexSearcher.maxDoc() > 0) {
245                                    ReaderUtil.gatherSubReaders(
246                                            indexReaders, indexSearcher.getIndexReader());
247                            }
248    
249                            Collection<Document> documents = new ArrayList<Document>();
250    
251                            DictionaryReader dictionaryReader = new DictionaryReader(
252                                    inputStream, StringPool.UTF8);
253    
254                            Iterator<DictionaryEntry> iterator =
255                                    dictionaryReader.getDictionaryEntriesIterator();
256    
257                            while (iterator.hasNext()) {
258                                    DictionaryEntry dictionaryEntry = iterator.next();
259    
260                                    String word = dictionaryEntry.getWord();
261    
262                                    boolean validWord = isValidWord(
263                                            localizedFieldName, word, indexReaders);
264    
265                                    if (!validWord) {
266                                            if (_log.isInfoEnabled()) {
267                                                    _log.info(
268                                                            "Not indexing because word " + word +
269                                                                    " is invalid");
270                                            }
271    
272                                            continue;
273                                    }
274    
275                                    Document document = createDocument(
276                                            companyId, groupId, languageId, localizedFieldName, word,
277                                            dictionaryEntry.getWeight(), typeFieldValue,
278                                            maxNGramLength);
279    
280                                    documents.add(document);
281                            }
282    
283                            indexAccessor.addDocuments(documents);
284                    }
285                    finally {
286                            LuceneHelperUtil.cleanUp(indexSearcher);
287                    }
288            }
289    
290            protected boolean isValidWord(
291                            String localizedFieldName, String word,
292                            List<IndexReader> indexReaders)
293                    throws IOException {
294    
295                    if (word.length() < _MINIMUM_WORD_LENGTH) {
296                            return false;
297                    }
298    
299                    if (SpellCheckerUtil.isValidWord(
300                                    localizedFieldName, word, indexReaders)) {
301    
302                            return false;
303                    }
304    
305                    return true;
306            }
307    
308            private static final int _MINIMUM_WORD_LENGTH = 3;
309    
310            private static Log _log = LogFactoryUtil.getLog(
311                    LuceneSpellCheckIndexWriter.class);
312    
313    }