001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
020 import com.liferay.portal.kernel.search.DictionaryEntry;
021 import com.liferay.portal.kernel.search.DictionaryReader;
022 import com.liferay.portal.kernel.search.DocumentImpl;
023 import com.liferay.portal.kernel.search.NGramHolder;
024 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
025 import com.liferay.portal.kernel.search.SearchContext;
026 import com.liferay.portal.kernel.search.SearchException;
027 import com.liferay.portal.kernel.search.SuggestionConstants;
028 import com.liferay.portal.kernel.util.StringPool;
029 import com.liferay.portal.util.PortletKeys;
030
031 import java.io.IOException;
032 import java.io.InputStream;
033
034 import java.util.ArrayList;
035 import java.util.Collection;
036 import java.util.Iterator;
037 import java.util.List;
038 import java.util.Map;
039
040 import org.apache.lucene.document.Document;
041 import org.apache.lucene.document.Field;
042 import org.apache.lucene.index.FieldInfo;
043 import org.apache.lucene.index.IndexReader;
044 import org.apache.lucene.index.Term;
045 import org.apache.lucene.search.IndexSearcher;
046 import org.apache.lucene.util.ReaderUtil;
047
048
051 public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
052
053 @Override
054 public void clearQuerySuggestionDictionaryIndexes(
055 SearchContext searchContext)
056 throws SearchException {
057
058 Term term = new Term(
059 com.liferay.portal.kernel.search.Field.TYPE,
060 SuggestionConstants.TYPE_QUERY_SUGGESTION);
061
062 try {
063 LuceneHelperUtil.deleteDocuments(
064 searchContext.getCompanyId(), term);
065 }
066 catch (IOException e) {
067 throw new SearchException(e);
068 }
069 }
070
071 @Override
072 public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
073 throws SearchException {
074
075 Term term = new Term(
076 com.liferay.portal.kernel.search.Field.TYPE,
077 SuggestionConstants.TYPE_SPELL_CHECKER);
078
079 try {
080 LuceneHelperUtil.deleteDocuments(
081 searchContext.getCompanyId(), term);
082 }
083 catch (IOException e) {
084 throw new SearchException(e);
085 }
086 }
087
088 protected void addField(
089 Document document, String fieldName, String fieldValue,
090 Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
091 boolean omitNorms) {
092
093 Field field = new Field(
094 fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
095
096 field.setIndexOptions(indexOptions);
097 field.setOmitNorms(omitNorms);
098
099 document.add(field);
100 }
101
102 protected void addNGramFields(
103 Document document, Map<String, String> nGrams) {
104
105 for (Map.Entry<String, String> entry : nGrams.entrySet()) {
106 String fieldName = entry.getKey();
107 String fieldValue = entry.getValue();
108
109 addField(
110 document, fieldName, fieldValue, Field.Store.NO,
111 FieldInfo.IndexOptions.DOCS_ONLY, true);
112 }
113 }
114
115 protected Document createDocument(
116 long companyId, long groupId, String languageId,
117 String localizedFieldName, String word, float weight,
118 String typeFieldValue, int maxNGramLength)
119 throws SearchException {
120
121 Document document = new Document();
122
123 addField(
124 document, com.liferay.portal.kernel.search.Field.GROUP_ID,
125 String.valueOf(groupId), Field.Store.YES,
126 FieldInfo.IndexOptions.DOCS_ONLY, true);
127 addField(
128 document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
129 languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
130 true);
131 addField(
132 document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
133 PortletKeys.SEARCH, Field.Store.YES,
134 FieldInfo.IndexOptions.DOCS_ONLY, true);
135 addField(
136 document, com.liferay.portal.kernel.search.Field.PRIORITY,
137 String.valueOf(weight), Field.Store.YES,
138 FieldInfo.IndexOptions.DOCS_ONLY, true);
139 addField(
140 document, com.liferay.portal.kernel.search.Field.TYPE,
141 typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
142 true);
143 addField(
144 document, com.liferay.portal.kernel.search.Field.UID,
145 getUID(companyId, languageId, word), Field.Store.YES,
146 FieldInfo.IndexOptions.DOCS_ONLY, true);
147 addField(
148 document, localizedFieldName, word, Field.Store.YES,
149 FieldInfo.IndexOptions.DOCS_ONLY, true);
150
151 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
152 word, maxNGramLength);
153
154 addNGramFields(document, nGramHolder.getNGramEnds());
155
156 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
157
158 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
159 String fieldName = entry.getKey();
160
161 for (String nGram : entry.getValue()) {
162 addField(
163 document, fieldName, nGram, Field.Store.NO,
164 FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
165 }
166 }
167
168 addNGramFields(document, nGramHolder.getNGramStarts());
169
170 return document;
171 }
172
173 @Override
174 protected void indexKeyword(
175 long companyId, long groupId, String languageId, String keyword,
176 float weight, String keywordFieldName, String typeFieldValue,
177 int maxNGramLength)
178 throws Exception {
179
180 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
181 companyId);
182
183 IndexSearcher indexSearcher = null;
184
185 try {
186 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
187
188 indexSearcher = LuceneHelperUtil.getSearcher(
189 indexAccessor.getCompanyId(), true);
190
191 if (indexSearcher.maxDoc() > 0) {
192 ReaderUtil.gatherSubReaders(
193 indexReaders, indexSearcher.getIndexReader());
194 }
195
196 String localizedFieldName = DocumentImpl.getLocalizedName(
197 languageId, keywordFieldName);
198
199 boolean validWord = isValidWord(
200 localizedFieldName, keyword, indexReaders);
201
202 if (!validWord) {
203 if (_log.isInfoEnabled()) {
204 _log.info(
205 "Not indexing because keyword " + keyword +
206 " is invalid");
207 }
208
209 return;
210 }
211
212 Document document = createDocument(
213 companyId, groupId, languageId, localizedFieldName, keyword,
214 weight, typeFieldValue, maxNGramLength);
215
216 indexAccessor.addDocument(document);
217 }
218 finally {
219 LuceneHelperUtil.cleanUp(indexSearcher);
220 }
221 }
222
223 @Override
224 protected void indexKeywords(
225 long companyId, long groupId, String languageId,
226 InputStream inputStream, String keywordFieldName,
227 String typeFieldValue, int maxNGramLength)
228 throws Exception {
229
230 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
231 companyId);
232
233 IndexSearcher indexSearcher = null;
234
235 try {
236 String localizedFieldName = DocumentImpl.getLocalizedName(
237 languageId, keywordFieldName);
238
239 indexSearcher = LuceneHelperUtil.getSearcher(
240 indexAccessor.getCompanyId(), true);
241
242 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
243
244 if (indexSearcher.maxDoc() > 0) {
245 ReaderUtil.gatherSubReaders(
246 indexReaders, indexSearcher.getIndexReader());
247 }
248
249 Collection<Document> documents = new ArrayList<Document>();
250
251 DictionaryReader dictionaryReader = new DictionaryReader(
252 inputStream, StringPool.UTF8);
253
254 Iterator<DictionaryEntry> iterator =
255 dictionaryReader.getDictionaryEntriesIterator();
256
257 while (iterator.hasNext()) {
258 DictionaryEntry dictionaryEntry = iterator.next();
259
260 String word = dictionaryEntry.getWord();
261
262 boolean validWord = isValidWord(
263 localizedFieldName, word, indexReaders);
264
265 if (!validWord) {
266 if (_log.isInfoEnabled()) {
267 _log.info(
268 "Not indexing because word " + word +
269 " is invalid");
270 }
271
272 continue;
273 }
274
275 Document document = createDocument(
276 companyId, groupId, languageId, localizedFieldName, word,
277 dictionaryEntry.getWeight(), typeFieldValue,
278 maxNGramLength);
279
280 documents.add(document);
281 }
282
283 indexAccessor.addDocuments(documents);
284 }
285 finally {
286 LuceneHelperUtil.cleanUp(indexSearcher);
287 }
288 }
289
290 protected boolean isValidWord(
291 String localizedFieldName, String word,
292 List<IndexReader> indexReaders)
293 throws IOException {
294
295 if (word.length() < _MINIMUM_WORD_LENGTH) {
296 return false;
297 }
298
299 if (SpellCheckerUtil.isValidWord(
300 localizedFieldName, word, indexReaders)) {
301
302 return false;
303 }
304
305 return true;
306 }
307
308 private static final int _MINIMUM_WORD_LENGTH = 3;
309
310 private static Log _log = LogFactoryUtil.getLog(
311 LuceneSpellCheckIndexWriter.class);
312
313 }