001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
018    import com.liferay.portal.kernel.search.DictionaryEntry;
019    import com.liferay.portal.kernel.search.DictionaryReader;
020    import com.liferay.portal.kernel.search.DocumentImpl;
021    import com.liferay.portal.kernel.search.NGramHolder;
022    import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
023    import com.liferay.portal.kernel.search.SearchContext;
024    import com.liferay.portal.kernel.search.SearchException;
025    import com.liferay.portal.kernel.util.StringPool;
026    import com.liferay.portal.util.PortletKeys;
027    
028    import java.io.IOException;
029    import java.io.InputStream;
030    
031    import java.util.ArrayList;
032    import java.util.Collection;
033    import java.util.Iterator;
034    import java.util.List;
035    import java.util.Map;
036    
037    import org.apache.lucene.document.Document;
038    import org.apache.lucene.document.Field;
039    import org.apache.lucene.index.FieldInfo;
040    import org.apache.lucene.index.IndexReader;
041    import org.apache.lucene.index.Term;
042    import org.apache.lucene.search.IndexSearcher;
043    import org.apache.lucene.util.ReaderUtil;
044    
045    /**
046     * @author Michael C. Han
047     */
048    public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
049    
050            @Override
051            public void clearQuerySuggestionDictionaryIndexes(
052                            SearchContext searchContext)
053                    throws SearchException {
054    
055                    Term term = new Term(
056                            com.liferay.portal.kernel.search.Field.TYPE, QUERY_SUGGESTION_TYPE);
057    
058                    try {
059                            LuceneHelperUtil.deleteDocuments(
060                                    searchContext.getCompanyId(), term);
061                    }
062                    catch (IOException e) {
063                            throw new SearchException(e);
064                    }
065            }
066    
067            @Override
068            public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
069                    throws SearchException {
070    
071                    Term term = new Term(
072                            com.liferay.portal.kernel.search.Field.TYPE, SPELL_CHECKER_TYPE);
073    
074                    try {
075                            LuceneHelperUtil.deleteDocuments(
076                                    searchContext.getCompanyId(), term);
077                    }
078                    catch (IOException e) {
079                            throw new SearchException(e);
080                    }
081            }
082    
083            protected void addField(
084                    Document document, String fieldName, String fieldValue,
085                    Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
086                    boolean omitNorms) {
087    
088                    Field field = new Field(
089                            fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
090    
091                    field.setIndexOptions(indexOptions);
092                    field.setOmitNorms(omitNorms);
093    
094                    document.add(field);
095            }
096    
097            protected void addNGramFields(
098                    Document document, Map<String, String> nGrams) {
099    
100                    for (Map.Entry<String, String> entry : nGrams.entrySet()) {
101                            String fieldName = entry.getKey();
102                            String fieldValue = entry.getValue();
103    
104                            addField(
105                                    document, fieldName, fieldValue, Field.Store.NO,
106                                    FieldInfo.IndexOptions.DOCS_ONLY, true);
107                    }
108            }
109    
110            protected Document createDocument(
111                            long companyId, long groupId, String languageId,
112                            String localizedFieldName, String word, float weight,
113                            String typeFieldValue, int maxNGramLength)
114                    throws SearchException {
115    
116                    Document document = new Document();
117    
118                    addField(
119                            document, com.liferay.portal.kernel.search.Field.GROUP_ID,
120                            String.valueOf(groupId), Field.Store.YES,
121                            FieldInfo.IndexOptions.DOCS_ONLY, true);
122                    addField(
123                            document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
124                            languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
125                            true);
126                    addField(
127                            document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
128                            PortletKeys.SEARCH, Field.Store.YES,
129                            FieldInfo.IndexOptions.DOCS_ONLY, true);
130                    addField(
131                            document, com.liferay.portal.kernel.search.Field.PRIORITY,
132                            String.valueOf(weight), Field.Store.YES,
133                            FieldInfo.IndexOptions.DOCS_ONLY, true);
134                    addField(
135                            document, com.liferay.portal.kernel.search.Field.TYPE,
136                            typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
137                            true);
138                    addField(
139                            document, com.liferay.portal.kernel.search.Field.UID,
140                            getUID(companyId, languageId, word), Field.Store.YES,
141                            FieldInfo.IndexOptions.DOCS_ONLY, true);
142                    addField(
143                            document, localizedFieldName, word, Field.Store.YES,
144                            FieldInfo.IndexOptions.DOCS_ONLY, true);
145    
146                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
147                            word, maxNGramLength);
148    
149                    addNGramFields(document, nGramHolder.getNGramEnds());
150    
151                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
152    
153                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
154                            String fieldName = entry.getKey();
155    
156                            for (String nGram : entry.getValue()) {
157                                    addField(
158                                            document, fieldName, nGram, Field.Store.NO,
159                                            FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
160                            }
161                    }
162    
163                    addNGramFields(document, nGramHolder.getNGramStarts());
164    
165                    return document;
166            }
167    
168            @Override
169            protected void indexKeywords(
170                            long companyId, long groupId, String languageId,
171                            InputStream inputStream, String keywordFieldName,
172                            String typeFieldValue, int maxNGramLength)
173                    throws Exception {
174    
175                    IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
176                            companyId);
177    
178                    IndexSearcher indexSearcher = null;
179    
180                    try {
181                            String localizedFieldName = DocumentImpl.getLocalizedName(
182                                    languageId, keywordFieldName);
183    
184                            indexSearcher = LuceneHelperUtil.getSearcher(
185                                    indexAccessor.getCompanyId(), true);
186    
187                            List<IndexReader> indexReaders = new ArrayList<IndexReader>();
188    
189                            if (indexSearcher.maxDoc() > 0) {
190                                    ReaderUtil.gatherSubReaders(
191                                            indexReaders, indexSearcher.getIndexReader());
192                            }
193    
194                            Collection<Document> documents = new ArrayList<Document>();
195    
196                            DictionaryReader dictionaryReader = new DictionaryReader(
197                                    inputStream, StringPool.UTF8);
198    
199                            Iterator<DictionaryEntry> iterator =
200                                    dictionaryReader.getDictionaryEntriesIterator();
201    
202                            while (iterator.hasNext()) {
203                                    DictionaryEntry dictionaryEntry = iterator.next();
204    
205                                    String word = dictionaryEntry.getWord();
206    
207                                    boolean validWord = isValidWord(
208                                            localizedFieldName, word, indexReaders);
209    
210                                    if (!validWord) {
211                                            continue;
212                                    }
213    
214                                    Document document = createDocument(
215                                            companyId, groupId, languageId, localizedFieldName, word,
216                                            dictionaryEntry.getWeight(), typeFieldValue,
217                                            maxNGramLength);
218    
219                                    documents.add(document);
220                            }
221    
222                            indexAccessor.addDocuments(documents);
223                    }
224                    finally {
225                            LuceneHelperUtil.cleanUp(indexSearcher);
226                    }
227            }
228    
229            protected boolean isValidWord(
230                            String localizedFieldName, String word,
231                            List<IndexReader> indexReaders)
232                    throws IOException {
233    
234                    if (word.length() < _MINIMUM_WORD_LENGTH) {
235                            return false;
236                    }
237    
238                    if (SpellCheckerUtil.isValidWord(
239                                    localizedFieldName, word, indexReaders)) {
240    
241                            return false;
242                    }
243    
244                    return true;
245            }
246    
247            private static final int _MINIMUM_WORD_LENGTH = 3;
248    
249    }