001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020 import com.liferay.portal.kernel.search.DictionaryEntry;
021 import com.liferay.portal.kernel.search.DictionaryReader;
022 import com.liferay.portal.kernel.search.DocumentImpl;
023 import com.liferay.portal.kernel.search.NGramHolder;
024 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025 import com.liferay.portal.kernel.search.SearchContext;
026 import com.liferay.portal.kernel.search.SearchException;
027 import com.liferay.portal.kernel.search.SuggestionConstants;
028 import com.liferay.portal.kernel.util.StringPool;
029 import com.liferay.portal.util.PortletKeys;
030
031 import java.io.IOException;
032 import java.io.InputStream;
033
034 import java.util.ArrayList;
035 import java.util.Collection;
036 import java.util.Iterator;
037 import java.util.List;
038 import java.util.Map;
039
040 import org.apache.lucene.document.Document;
041 import org.apache.lucene.document.Field;
042 import org.apache.lucene.index.FieldInfo;
043 import org.apache.lucene.index.IndexReader;
044 import org.apache.lucene.index.Term;
045 import org.apache.lucene.search.IndexSearcher;
046 import org.apache.lucene.util.ReaderUtil;
047
048
051 public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052
053 @Override
054 public void clearQuerySuggestionDictionaryIndexes(
055 SearchContext searchContext)
056 throws SearchException {
057
058 Term term = new Term(
059 com.liferay.portal.kernel.search.Field.TYPE,
060 SuggestionConstants.TYPE_QUERY_SUGGESTION);
061
062 try {
063 LuceneHelperUtil.deleteDocuments(
064 searchContext.getCompanyId(), term);
065 }
066 catch (IOException e) {
067 throw new SearchException(e);
068 }
069 }
070
071 @Override
072 public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073 throws SearchException {
074
075 Term term = new Term(
076 com.liferay.portal.kernel.search.Field.TYPE,
077 SuggestionConstants.TYPE_SPELL_CHECKER);
078
079 try {
080 LuceneHelperUtil.deleteDocuments(
081 searchContext.getCompanyId(), term);
082 }
083 catch (IOException e) {
084 throw new SearchException(e);
085 }
086 }
087
088 protected void addField(
089 Document document, String fieldName, String fieldValue,
090 Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091 boolean omitNorms) {
092
093 Field field = new Field(
094 fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095
096 field.setIndexOptions(indexOptions);
097 field.setOmitNorms(omitNorms);
098
099 document.add(field);
100 }
101
102 protected void addNGramFields(
103 Document document, Map<String, String> nGrams) {
104
105 for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106 String fieldName = entry.getKey();
107 String fieldValue = entry.getValue();
108
109 addField(
110 document, fieldName, fieldValue, Field.Store.NO,
111 FieldInfo.IndexOptions.DOCS_ONLY, true);
112 }
113 }
114
115 protected Document createDocument(
116 long companyId, long groupId, String languageId,
117 String localizedFieldName, String word, float weight,
118 String typeFieldValue, int maxNGramLength)
119 throws SearchException {
120
121 Document document = new Document();
122
123 addField(
124 document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125 String.valueOf(groupId), Field.Store.YES,
126 FieldInfo.IndexOptions.DOCS_ONLY, true);
127 addField(
128 document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129 languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130 true);
131 addField(
132 document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133 PortletKeys.SEARCH, Field.Store.YES,
134 FieldInfo.IndexOptions.DOCS_ONLY, true);
135 addField(
136 document, com.liferay.portal.kernel.search.Field.PRIORITY,
137 String.valueOf(weight), Field.Store.YES,
138 FieldInfo.IndexOptions.DOCS_ONLY, true);
139 addField(
140 document, com.liferay.portal.kernel.search.Field.TYPE,
141 typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142 true);
143 addField(
144 document, com.liferay.portal.kernel.search.Field.UID,
145 getUID(companyId, languageId, word), Field.Store.YES,
146 FieldInfo.IndexOptions.DOCS_ONLY, true);
147 addField(
148 document, localizedFieldName, word, Field.Store.YES,
149 FieldInfo.IndexOptions.DOCS_ONLY, true);
150
151 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152 word, maxNGramLength);
153
154 addNGramFields(document, nGramHolder.getNGramEnds());
155
156 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157
158 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159 String fieldName = entry.getKey();
160
161 for (String nGram : entry.getValue()) {
162 addField(
163 document, fieldName, nGram, Field.Store.NO,
164 FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165 }
166 }
167
168 addNGramFields(document, nGramHolder.getNGramStarts());
169
170 return document;
171 }
172
173 @Override
174 protected void indexKeyword(
175 long companyId, long groupId, String languageId, String keyword,
176 float weight, String keywordFieldName, String typeFieldValue,
177 int maxNGramLength)
178 throws Exception {
179
180 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181 companyId);
182
183 IndexSearcher indexSearcher = null;
184
185 try {
186 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187
188 indexSearcher = LuceneHelperUtil.getIndexSearcher(companyId);
189
190 if (indexSearcher.maxDoc() > 0) {
191 ReaderUtil.gatherSubReaders(
192 indexReaders, indexSearcher.getIndexReader());
193 }
194
195 String localizedFieldName = DocumentImpl.getLocalizedName(
196 languageId, keywordFieldName);
197
198 boolean validWord = isValidWord(
199 localizedFieldName, keyword, indexReaders);
200
201 if (!validWord) {
202 if (_log.isInfoEnabled()) {
203 _log.info(
204 "Not indexing because keyword " + keyword +
205 " is invalid");
206 }
207
208 return;
209 }
210
211 Document document = createDocument(
212 companyId, groupId, languageId, localizedFieldName, keyword,
213 weight, typeFieldValue, maxNGramLength);
214
215 indexAccessor.addDocument(document);
216 }
217 finally {
218 try {
219 LuceneHelperUtil.releaseIndexSearcher(companyId, indexSearcher);
220 }
221 catch (IOException ioe) {
222 _log.error("Unable to release searcher", ioe);
223 }
224 }
225 }
226
227 @Override
228 protected void indexKeywords(
229 long companyId, long groupId, String languageId,
230 InputStream inputStream, String keywordFieldName,
231 String typeFieldValue, int maxNGramLength)
232 throws Exception {
233
234 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
235 companyId);
236
237 IndexSearcher indexSearcher = null;
238
239 try {
240 String localizedFieldName = DocumentImpl.getLocalizedName(
241 languageId, keywordFieldName);
242
243 indexSearcher = LuceneHelperUtil.getIndexSearcher(companyId);
244
245 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
246
247 if (indexSearcher.maxDoc() > 0) {
248 ReaderUtil.gatherSubReaders(
249 indexReaders, indexSearcher.getIndexReader());
250 }
251
252 Collection<Document> documents = new ArrayList<Document>();
253
254 DictionaryReader dictionaryReader = new DictionaryReader(
255 inputStream, StringPool.UTF8);
256
257 Iterator<DictionaryEntry> iterator =
258 dictionaryReader.getDictionaryEntriesIterator();
259
260 while (iterator.hasNext()) {
261 DictionaryEntry dictionaryEntry = iterator.next();
262
263 String word = dictionaryEntry.getWord();
264
265 boolean validWord = isValidWord(
266 localizedFieldName, word, indexReaders);
267
268 if (!validWord) {
269 if (_log.isInfoEnabled()) {
270 _log.info(
271 "Not indexing because word " + word +
272 " is invalid");
273 }
274
275 continue;
276 }
277
278 Document document = createDocument(
279 companyId, groupId, languageId, localizedFieldName, word,
280 dictionaryEntry.getWeight(), typeFieldValue,
281 maxNGramLength);
282
283 documents.add(document);
284 }
285
286 indexAccessor.addDocuments(documents);
287 }
288 finally {
289 try {
290 LuceneHelperUtil.releaseIndexSearcher(companyId, indexSearcher);
291 }
292 catch (IOException ioe) {
293 _log.error("Unable to release searcher", ioe);
294 }
295 }
296 }
297
298 protected boolean isValidWord(
299 String localizedFieldName, String word,
300 List<IndexReader> indexReaders)
301 throws IOException {
302
303 if (word.length() < _MINIMUM_WORD_LENGTH) {
304 return false;
305 }
306
307 if (SpellCheckerUtil.isValidWord(
308 localizedFieldName, word, indexReaders)) {
309
310 return false;
311 }
312
313 return true;
314 }
315
316 private static final int _MINIMUM_WORD_LENGTH = 3;
317
318 private static Log _log = LogFactoryUtil.getLog(
319 LuceneSpellCheckIndexWriter.class);
320
321 }