001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
018 import com.liferay.portal.kernel.search.DictionaryEntry;
019 import com.liferay.portal.kernel.search.DictionaryReader;
020 import com.liferay.portal.kernel.search.DocumentImpl;
021 import com.liferay.portal.kernel.search.NGramHolder;
022 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
023 import com.liferay.portal.kernel.search.SearchContext;
024 import com.liferay.portal.kernel.search.SearchException;
025 import com.liferay.portal.kernel.util.StringPool;
026 import com.liferay.portal.util.PortletKeys;
027
028 import java.io.IOException;
029 import java.io.InputStream;
030
031 import java.util.ArrayList;
032 import java.util.Collection;
033 import java.util.Iterator;
034 import java.util.List;
035 import java.util.Map;
036
037 import org.apache.lucene.document.Document;
038 import org.apache.lucene.document.Field;
039 import org.apache.lucene.index.FieldInfo;
040 import org.apache.lucene.index.IndexReader;
041 import org.apache.lucene.index.Term;
042 import org.apache.lucene.search.IndexSearcher;
043 import org.apache.lucene.util.ReaderUtil;
044
045
048 public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
049
050 @Override
051 public void clearQuerySuggestionDictionaryIndexes(
052 SearchContext searchContext)
053 throws SearchException {
054
055 Term term = new Term(
056 com.liferay.portal.kernel.search.Field.TYPE, QUERY_SUGGESTION_TYPE);
057
058 try {
059 LuceneHelperUtil.deleteDocuments(
060 searchContext.getCompanyId(), term);
061 }
062 catch (IOException e) {
063 throw new SearchException(e);
064 }
065 }
066
067 @Override
068 public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
069 throws SearchException {
070
071 Term term = new Term(
072 com.liferay.portal.kernel.search.Field.TYPE, SPELL_CHECKER_TYPE);
073
074 try {
075 LuceneHelperUtil.deleteDocuments(
076 searchContext.getCompanyId(), term);
077 }
078 catch (IOException e) {
079 throw new SearchException(e);
080 }
081 }
082
083 protected void addField(
084 Document document, String fieldName, String fieldValue,
085 Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
086 boolean omitNorms) {
087
088 Field field = new Field(
089 fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
090
091 field.setIndexOptions(indexOptions);
092 field.setOmitNorms(omitNorms);
093
094 document.add(field);
095 }
096
097 protected void addNGramFields(
098 Document document, Map<String, String> nGrams) {
099
100 for (Map.Entry<String, String> entry : nGrams.entrySet()) {
101 String fieldName = entry.getKey();
102 String fieldValue = entry.getValue();
103
104 addField(
105 document, fieldName, fieldValue, Field.Store.NO,
106 FieldInfo.IndexOptions.DOCS_ONLY, true);
107 }
108 }
109
110 protected Document createDocument(
111 long companyId, long groupId, String languageId,
112 String localizedFieldName, String word, float weight,
113 String typeFieldValue, int maxNGramLength)
114 throws SearchException {
115
116 Document document = new Document();
117
118 addField(
119 document, com.liferay.portal.kernel.search.Field.GROUP_ID,
120 String.valueOf(groupId), Field.Store.YES,
121 FieldInfo.IndexOptions.DOCS_ONLY, true);
122 addField(
123 document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
124 languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
125 true);
126 addField(
127 document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
128 PortletKeys.SEARCH, Field.Store.YES,
129 FieldInfo.IndexOptions.DOCS_ONLY, true);
130 addField(
131 document, com.liferay.portal.kernel.search.Field.PRIORITY,
132 String.valueOf(weight), Field.Store.YES,
133 FieldInfo.IndexOptions.DOCS_ONLY, true);
134 addField(
135 document, com.liferay.portal.kernel.search.Field.TYPE,
136 typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
137 true);
138 addField(
139 document, com.liferay.portal.kernel.search.Field.UID,
140 getUID(companyId, languageId, word), Field.Store.YES,
141 FieldInfo.IndexOptions.DOCS_ONLY, true);
142 addField(
143 document, localizedFieldName, word, Field.Store.YES,
144 FieldInfo.IndexOptions.DOCS_ONLY, true);
145
146 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
147 word, maxNGramLength);
148
149 addNGramFields(document, nGramHolder.getNGramEnds());
150
151 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
152
153 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
154 String fieldName = entry.getKey();
155
156 for (String nGram : entry.getValue()) {
157 addField(
158 document, fieldName, nGram, Field.Store.NO,
159 FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
160 }
161 }
162
163 addNGramFields(document, nGramHolder.getNGramStarts());
164
165 return document;
166 }
167
168 @Override
169 protected void indexKeywords(
170 long companyId, long groupId, String languageId,
171 InputStream inputStream, String keywordFieldName,
172 String typeFieldValue, int maxNGramLength)
173 throws Exception {
174
175 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
176 companyId);
177
178 IndexSearcher indexSearcher = null;
179
180 try {
181 String localizedFieldName = DocumentImpl.getLocalizedName(
182 languageId, keywordFieldName);
183
184 indexSearcher = LuceneHelperUtil.getSearcher(
185 indexAccessor.getCompanyId(), true);
186
187 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
188
189 if (indexSearcher.maxDoc() > 0) {
190 ReaderUtil.gatherSubReaders(
191 indexReaders, indexSearcher.getIndexReader());
192 }
193
194 Collection<Document> documents = new ArrayList<Document>();
195
196 DictionaryReader dictionaryReader = new DictionaryReader(
197 inputStream, StringPool.UTF8);
198
199 Iterator<DictionaryEntry> iterator =
200 dictionaryReader.getDictionaryEntriesIterator();
201
202 while (iterator.hasNext()) {
203 DictionaryEntry dictionaryEntry = iterator.next();
204
205 String word = dictionaryEntry.getWord();
206
207 boolean validWord = isValidWord(
208 localizedFieldName, word, indexReaders);
209
210 if (!validWord) {
211 continue;
212 }
213
214 Document document = createDocument(
215 companyId, groupId, languageId, localizedFieldName, word,
216 dictionaryEntry.getWeight(), typeFieldValue,
217 maxNGramLength);
218
219 documents.add(document);
220 }
221
222 indexAccessor.addDocuments(documents);
223 }
224 finally {
225 LuceneHelperUtil.cleanUp(indexSearcher);
226 }
227 }
228
229 protected boolean isValidWord(
230 String localizedFieldName, String word,
231 List<IndexReader> indexReaders)
232 throws IOException {
233
234 if (word.length() < _MINIMUM_WORD_LENGTH) {
235 return false;
236 }
237
238 if (SpellCheckerUtil.isValidWord(
239 localizedFieldName, word, indexReaders)) {
240
241 return false;
242 }
243
244 return true;
245 }
246
247 private static final int _MINIMUM_WORD_LENGTH = 3;
248
249 }