001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020 import com.liferay.portal.kernel.search.DictionaryEntry;
021 import com.liferay.portal.kernel.search.DictionaryReader;
022 import com.liferay.portal.kernel.search.DocumentImpl;
023 import com.liferay.portal.kernel.search.NGramHolder;
024 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025 import com.liferay.portal.kernel.search.SearchContext;
026 import com.liferay.portal.kernel.search.SearchException;
027 import com.liferay.portal.kernel.search.SuggestionConstants;
028 import com.liferay.portal.kernel.util.StringPool;
029 import com.liferay.portal.util.PortletKeys;
030
031 import java.io.IOException;
032 import java.io.InputStream;
033
034 import java.util.ArrayList;
035 import java.util.Collection;
036 import java.util.Iterator;
037 import java.util.List;
038 import java.util.Map;
039
040 import org.apache.lucene.document.Document;
041 import org.apache.lucene.document.Field;
042 import org.apache.lucene.index.FieldInfo;
043 import org.apache.lucene.index.IndexReader;
044 import org.apache.lucene.index.Term;
045 import org.apache.lucene.search.IndexSearcher;
046 import org.apache.lucene.util.ReaderUtil;
047
048
051 public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052
053 @Override
054 public void clearQuerySuggestionDictionaryIndexes(
055 SearchContext searchContext)
056 throws SearchException {
057
058 Term term = new Term(
059 com.liferay.portal.kernel.search.Field.TYPE,
060 SuggestionConstants.TYPE_QUERY_SUGGESTION);
061
062 try {
063 LuceneHelperUtil.deleteDocuments(
064 searchContext.getCompanyId(), term);
065 }
066 catch (IOException e) {
067 throw new SearchException(e);
068 }
069 }
070
071 @Override
072 public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073 throws SearchException {
074
075 Term term = new Term(
076 com.liferay.portal.kernel.search.Field.TYPE,
077 SuggestionConstants.TYPE_SPELL_CHECKER);
078
079 try {
080 LuceneHelperUtil.deleteDocuments(
081 searchContext.getCompanyId(), term);
082 }
083 catch (IOException e) {
084 throw new SearchException(e);
085 }
086 }
087
088 protected void addField(
089 Document document, String fieldName, String fieldValue,
090 Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091 boolean omitNorms) {
092
093 Field field = new Field(
094 fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095
096 field.setIndexOptions(indexOptions);
097 field.setOmitNorms(omitNorms);
098
099 document.add(field);
100 }
101
102 protected void addNGramFields(
103 Document document, Map<String, String> nGrams) {
104
105 for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106 String fieldName = entry.getKey();
107 String fieldValue = entry.getValue();
108
109 addField(
110 document, fieldName, fieldValue, Field.Store.NO,
111 FieldInfo.IndexOptions.DOCS_ONLY, true);
112 }
113 }
114
115 protected Document createDocument(
116 long companyId, long groupId, String languageId,
117 String localizedFieldName, String word, float weight,
118 String typeFieldValue, int maxNGramLength)
119 throws SearchException {
120
121 Document document = new Document();
122
123 addField(
124 document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125 String.valueOf(groupId), Field.Store.YES,
126 FieldInfo.IndexOptions.DOCS_ONLY, true);
127 addField(
128 document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129 languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130 true);
131 addField(
132 document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133 PortletKeys.SEARCH, Field.Store.YES,
134 FieldInfo.IndexOptions.DOCS_ONLY, true);
135 addField(
136 document, com.liferay.portal.kernel.search.Field.PRIORITY,
137 String.valueOf(weight), Field.Store.YES,
138 FieldInfo.IndexOptions.DOCS_ONLY, true);
139 addField(
140 document, com.liferay.portal.kernel.search.Field.TYPE,
141 typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142 true);
143 addField(
144 document, com.liferay.portal.kernel.search.Field.UID,
145 getUID(companyId, languageId, word), Field.Store.YES,
146 FieldInfo.IndexOptions.DOCS_ONLY, true);
147 addField(
148 document, localizedFieldName, word, Field.Store.YES,
149 FieldInfo.IndexOptions.DOCS_ONLY, true);
150
151 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152 word, maxNGramLength);
153
154 addNGramFields(document, nGramHolder.getNGramEnds());
155
156 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157
158 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159 String fieldName = entry.getKey();
160
161 for (String nGram : entry.getValue()) {
162 addField(
163 document, fieldName, nGram, Field.Store.NO,
164 FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165 }
166 }
167
168 addNGramFields(document, nGramHolder.getNGramStarts());
169
170 return document;
171 }
172
173 @Override
174 protected void indexKeyword(
175 SearchContext searchContext, long groupId, String languageId,
176 String keyword, float weight, String keywordFieldName,
177 String typeFieldValue, int maxNGramLength)
178 throws Exception {
179
180 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181 searchContext.getCompanyId());
182
183 IndexSearcher indexSearcher = null;
184
185 try {
186 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187
188 indexSearcher = LuceneHelperUtil.getIndexSearcher(
189 searchContext.getCompanyId());
190
191 if (indexSearcher.maxDoc() > 0) {
192 ReaderUtil.gatherSubReaders(
193 indexReaders, indexSearcher.getIndexReader());
194 }
195
196 String localizedFieldName = DocumentImpl.getLocalizedName(
197 languageId, keywordFieldName);
198
199 boolean validWord = isValidWord(
200 localizedFieldName, keyword, indexReaders);
201
202 if (!validWord) {
203 if (_log.isInfoEnabled()) {
204 _log.info(
205 "Not indexing because keyword " + keyword +
206 " is invalid");
207 }
208
209 return;
210 }
211
212 Document document = createDocument(
213 searchContext.getCompanyId(), groupId, languageId,
214 localizedFieldName, keyword, weight, typeFieldValue,
215 maxNGramLength);
216
217 indexAccessor.addDocument(document);
218 }
219 finally {
220 try {
221 LuceneHelperUtil.releaseIndexSearcher(
222 searchContext.getCompanyId(), indexSearcher);
223 }
224 catch (IOException ioe) {
225 _log.error("Unable to release searcher", ioe);
226 }
227 }
228 }
229
230 @Override
231 protected void indexKeywords(
232 SearchContext searchContext, long groupId, String languageId,
233 InputStream inputStream, String keywordFieldName,
234 String typeFieldValue, int maxNGramLength)
235 throws Exception {
236
237 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
238 searchContext.getCompanyId());
239
240 IndexSearcher indexSearcher = null;
241
242 try {
243 String localizedFieldName = DocumentImpl.getLocalizedName(
244 languageId, keywordFieldName);
245
246 indexSearcher = LuceneHelperUtil.getIndexSearcher(
247 searchContext.getCompanyId());
248
249 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
250
251 if (indexSearcher.maxDoc() > 0) {
252 ReaderUtil.gatherSubReaders(
253 indexReaders, indexSearcher.getIndexReader());
254 }
255
256 Collection<Document> documents = new ArrayList<Document>();
257
258 DictionaryReader dictionaryReader = new DictionaryReader(
259 inputStream, StringPool.UTF8);
260
261 Iterator<DictionaryEntry> iterator =
262 dictionaryReader.getDictionaryEntriesIterator();
263
264 while (iterator.hasNext()) {
265 DictionaryEntry dictionaryEntry = iterator.next();
266
267 String word = dictionaryEntry.getWord();
268
269 boolean validWord = isValidWord(
270 localizedFieldName, word, indexReaders);
271
272 if (!validWord) {
273 if (_log.isInfoEnabled()) {
274 _log.info(
275 "Not indexing because word " + word +
276 " is invalid");
277 }
278
279 continue;
280 }
281
282 Document document = createDocument(
283 searchContext.getCompanyId(), groupId, languageId,
284 localizedFieldName, word, dictionaryEntry.getWeight(),
285 typeFieldValue, maxNGramLength);
286
287 documents.add(document);
288 }
289
290 indexAccessor.addDocuments(documents);
291 }
292 finally {
293 try {
294 LuceneHelperUtil.releaseIndexSearcher(
295 searchContext.getCompanyId(), indexSearcher);
296 }
297 catch (IOException ioe) {
298 _log.error("Unable to release searcher", ioe);
299 }
300 }
301 }
302
303 protected boolean isValidWord(
304 String localizedFieldName, String word,
305 List<IndexReader> indexReaders)
306 throws IOException {
307
308 if (word.length() < _MINIMUM_WORD_LENGTH) {
309 return false;
310 }
311
312 if (SpellCheckerUtil.isValidWord(
313 localizedFieldName, word, indexReaders)) {
314
315 return false;
316 }
317
318 return true;
319 }
320
321 private static final int _MINIMUM_WORD_LENGTH = 3;
322
323 private static final Log _log = LogFactoryUtil.getLog(
324 LuceneSpellCheckIndexWriter.class);
325
326 }