001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
018    import com.liferay.portal.kernel.search.DictionaryEntry;
019    import com.liferay.portal.kernel.search.DictionaryReader;
020    import com.liferay.portal.kernel.search.DocumentImpl;
021    import com.liferay.portal.kernel.search.NGramHolder;
022    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
023    import com.liferay.portal.kernel.search.SearchContext;
024    import com.liferay.portal.kernel.search.SearchException;
025    import com.liferay.portal.kernel.search.SuggestionConstants;
026    import com.liferay.portal.kernel.util.StringPool;
027    import com.liferay.portal.util.PortletKeys;
028    
029    import java.io.IOException;
030    import java.io.InputStream;
031    
032    import java.util.ArrayList;
033    import java.util.Collection;
034    import java.util.Iterator;
035    import java.util.List;
036    import java.util.Map;
037    
038    import org.apache.lucene.document.Document;
039    import org.apache.lucene.document.Field;
040    import org.apache.lucene.index.FieldInfo;
041    import org.apache.lucene.index.IndexReader;
042    import org.apache.lucene.index.Term;
043    import org.apache.lucene.search.IndexSearcher;
044    import org.apache.lucene.util.ReaderUtil;
045    
046    /**
047     * @author Michael C. Han
048     */
049    public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
050    
051            @Override
052            public void clearQuerySuggestionDictionaryIndexes(
053                            SearchContext searchContext)
054                    throws SearchException {
055    
056                    Term term = new Term(
057                            com.liferay.portal.kernel.search.Field.TYPE,
058                            SuggestionConstants.TYPE_QUERY_SUGGESTION);
059    
060                    try {
061                            LuceneHelperUtil.deleteDocuments(
062                                    searchContext.getCompanyId(), term);
063                    }
064                    catch (IOException e) {
065                            throw new SearchException(e);
066                    }
067            }
068    
069            @Override
070            public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
071                    throws SearchException {
072    
073                    Term term = new Term(
074                            com.liferay.portal.kernel.search.Field.TYPE,
075                            SuggestionConstants.TYPE_SPELL_CHECKER);
076    
077                    try {
078                            LuceneHelperUtil.deleteDocuments(
079                                    searchContext.getCompanyId(), term);
080                    }
081                    catch (IOException e) {
082                            throw new SearchException(e);
083                    }
084            }
085    
086            protected void addField(
087                    Document document, String fieldName, String fieldValue,
088                    Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
089                    boolean omitNorms) {
090    
091                    Field field = new Field(
092                            fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
093    
094                    field.setIndexOptions(indexOptions);
095                    field.setOmitNorms(omitNorms);
096    
097                    document.add(field);
098            }
099    
100            protected void addNGramFields(
101                    Document document, Map<String, String> nGrams) {
102    
103                    for (Map.Entry<String, String> entry : nGrams.entrySet()) {
104                            String fieldName = entry.getKey();
105                            String fieldValue = entry.getValue();
106    
107                            addField(
108                                    document, fieldName, fieldValue, Field.Store.NO,
109                                    FieldInfo.IndexOptions.DOCS_ONLY, true);
110                    }
111            }
112    
113            protected Document createDocument(
114                            long companyId, long groupId, String languageId,
115                            String localizedFieldName, String word, float weight,
116                            String typeFieldValue, int maxNGramLength)
117                    throws SearchException {
118    
119                    Document document = new Document();
120    
121                    addField(
122                            document, com.liferay.portal.kernel.search.Field.GROUP_ID,
123                            String.valueOf(groupId), Field.Store.YES,
124                            FieldInfo.IndexOptions.DOCS_ONLY, true);
125                    addField(
126                            document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
127                            languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
128                            true);
129                    addField(
130                            document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
131                            PortletKeys.SEARCH, Field.Store.YES,
132                            FieldInfo.IndexOptions.DOCS_ONLY, true);
133                    addField(
134                            document, com.liferay.portal.kernel.search.Field.PRIORITY,
135                            String.valueOf(weight), Field.Store.YES,
136                            FieldInfo.IndexOptions.DOCS_ONLY, true);
137                    addField(
138                            document, com.liferay.portal.kernel.search.Field.TYPE,
139                            typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
140                            true);
141                    addField(
142                            document, com.liferay.portal.kernel.search.Field.UID,
143                            getUID(companyId, languageId, word), Field.Store.YES,
144                            FieldInfo.IndexOptions.DOCS_ONLY, true);
145                    addField(
146                            document, localizedFieldName, word, Field.Store.YES,
147                            FieldInfo.IndexOptions.DOCS_ONLY, true);
148    
149                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
150                            word, maxNGramLength);
151    
152                    addNGramFields(document, nGramHolder.getNGramEnds());
153    
154                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
155    
156                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
157                            String fieldName = entry.getKey();
158    
159                            for (String nGram : entry.getValue()) {
160                                    addField(
161                                            document, fieldName, nGram, Field.Store.NO,
162                                            FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
163                            }
164                    }
165    
166                    addNGramFields(document, nGramHolder.getNGramStarts());
167    
168                    return document;
169            }
170    
171            @Override
172            protected void indexKeywords(
173                            long companyId, long groupId, String languageId,
174                            InputStream inputStream, String keywordFieldName,
175                            String typeFieldValue, int maxNGramLength)
176                    throws Exception {
177    
178                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
179                            companyId);
180    
181                    IndexSearcher indexSearcher = null;
182    
183                    try {
184                            String localizedFieldName = DocumentImpl.getLocalizedName(
185                                    languageId, keywordFieldName);
186    
187                            indexSearcher = LuceneHelperUtil.getSearcher(
188                                    indexAccessor.getCompanyId(), true);
189    
190                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
191    
192                            if (indexSearcher.maxDoc() > 0) {
193                                    ReaderUtil.gatherSubReaders(
194                                            indexReaders, indexSearcher.getIndexReader());
195                            }
196    
197                            Collection<Document> documents = new ArrayList<Document>();
198    
199                            DictionaryReader dictionaryReader = new DictionaryReader(
200                                    inputStream, StringPool.UTF8);
201    
202                            Iterator<DictionaryEntry> iterator =
203                                    dictionaryReader.getDictionaryEntriesIterator();
204    
205                            while (iterator.hasNext()) {
206                                    DictionaryEntry dictionaryEntry = iterator.next();
207    
208                                    String word = dictionaryEntry.getWord();
209    
210                                    boolean validWord = isValidWord(
211                                            localizedFieldName, word, indexReaders);
212    
213                                    if (!validWord) {
214                                            continue;
215                                    }
216    
217                                    Document document = createDocument(
218                                            companyId, groupId, languageId, localizedFieldName, word,
219                                            dictionaryEntry.getWeight(), typeFieldValue,
220                                            maxNGramLength);
221    
222                                    documents.add(document);
223                            }
224    
225                            indexAccessor.addDocuments(documents);
226                    }
227                    finally {
228                            LuceneHelperUtil.cleanUp(indexSearcher);
229                    }
230            }
231    
232            protected boolean isValidWord(
233                            String localizedFieldName, String word,
234                            List<IndexReader> indexReaders)
235                    throws IOException {
236    
237                    if (word.length() < _MINIMUM_WORD_LENGTH) {
238                            return false;
239                    }
240    
241                    if (SpellCheckerUtil.isValidWord(
242                                    localizedFieldName, word, indexReaders)) {
243    
244                            return false;
245                    }
246    
247                    return true;
248            }
249    
250            private static final int _MINIMUM_WORD_LENGTH = 3;
251    
252    }