001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.liferay.portal.kernel.search.BaseSpellCheckIndexWriter;
018 import com.liferay.portal.kernel.search.DictionaryEntry;
019 import com.liferay.portal.kernel.search.DictionaryReader;
020 import com.liferay.portal.kernel.search.DocumentImpl;
021 import com.liferay.portal.kernel.search.NGramHolder;
022 import com.liferay.portal.kernel.search.NGramHolderBuilderUtil;
023 import com.liferay.portal.kernel.search.SearchContext;
024 import com.liferay.portal.kernel.search.SearchException;
025 import com.liferay.portal.kernel.search.SuggestionConstants;
026 import com.liferay.portal.kernel.util.StringPool;
027 import com.liferay.portal.util.PortletKeys;
028
029 import java.io.IOException;
030 import java.io.InputStream;
031
032 import java.util.ArrayList;
033 import java.util.Collection;
034 import java.util.Iterator;
035 import java.util.List;
036 import java.util.Map;
037
038 import org.apache.lucene.document.Document;
039 import org.apache.lucene.document.Field;
040 import org.apache.lucene.index.FieldInfo;
041 import org.apache.lucene.index.IndexReader;
042 import org.apache.lucene.index.Term;
043 import org.apache.lucene.search.IndexSearcher;
044 import org.apache.lucene.util.ReaderUtil;
045
046
049 public class LuceneSpellCheckIndexWriter extends BaseSpellCheckIndexWriter {
050
051 @Override
052 public void clearQuerySuggestionDictionaryIndexes(
053 SearchContext searchContext)
054 throws SearchException {
055
056 Term term = new Term(
057 com.liferay.portal.kernel.search.Field.TYPE,
058 SuggestionConstants.QUERY_SUGGESTION_TYPE);
059
060 try {
061 LuceneHelperUtil.deleteDocuments(
062 searchContext.getCompanyId(), term);
063 }
064 catch (IOException e) {
065 throw new SearchException(e);
066 }
067 }
068
069 @Override
070 public void clearSpellCheckerDictionaryIndexes(SearchContext searchContext)
071 throws SearchException {
072
073 Term term = new Term(
074 com.liferay.portal.kernel.search.Field.TYPE,
075 SuggestionConstants.SPELL_CHECKER_TYPE);
076
077 try {
078 LuceneHelperUtil.deleteDocuments(
079 searchContext.getCompanyId(), term);
080 }
081 catch (IOException e) {
082 throw new SearchException(e);
083 }
084 }
085
086 protected void addField(
087 Document document, String fieldName, String fieldValue,
088 Field.Store fieldStore, FieldInfo.IndexOptions indexOptions,
089 boolean omitNorms) {
090
091 Field field = new Field(
092 fieldName, fieldValue, fieldStore, Field.Index.NOT_ANALYZED);
093
094 field.setIndexOptions(indexOptions);
095 field.setOmitNorms(omitNorms);
096
097 document.add(field);
098 }
099
100 protected void addNGramFields(
101 Document document, Map<String, String> nGrams) {
102
103 for (Map.Entry<String, String> entry : nGrams.entrySet()) {
104 String fieldName = entry.getKey();
105 String fieldValue = entry.getValue();
106
107 addField(
108 document, fieldName, fieldValue, Field.Store.NO,
109 FieldInfo.IndexOptions.DOCS_ONLY, true);
110 }
111 }
112
113 protected Document createDocument(
114 long companyId, long groupId, String languageId,
115 String localizedFieldName, String word, float weight,
116 String typeFieldValue, int maxNGramLength)
117 throws SearchException {
118
119 Document document = new Document();
120
121 addField(
122 document, com.liferay.portal.kernel.search.Field.GROUP_ID,
123 String.valueOf(groupId), Field.Store.YES,
124 FieldInfo.IndexOptions.DOCS_ONLY, true);
125 addField(
126 document, com.liferay.portal.kernel.search.Field.LANGUAGE_ID,
127 languageId, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
128 true);
129 addField(
130 document, com.liferay.portal.kernel.search.Field.PORTLET_ID,
131 PortletKeys.SEARCH, Field.Store.YES,
132 FieldInfo.IndexOptions.DOCS_ONLY, true);
133 addField(
134 document, com.liferay.portal.kernel.search.Field.PRIORITY,
135 String.valueOf(weight), Field.Store.YES,
136 FieldInfo.IndexOptions.DOCS_ONLY, true);
137 addField(
138 document, com.liferay.portal.kernel.search.Field.TYPE,
139 typeFieldValue, Field.Store.YES, FieldInfo.IndexOptions.DOCS_ONLY,
140 true);
141 addField(
142 document, com.liferay.portal.kernel.search.Field.UID,
143 getUID(companyId, languageId, word), Field.Store.YES,
144 FieldInfo.IndexOptions.DOCS_ONLY, true);
145 addField(
146 document, localizedFieldName, word, Field.Store.YES,
147 FieldInfo.IndexOptions.DOCS_ONLY, true);
148
149 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
150 word, maxNGramLength);
151
152 addNGramFields(document, nGramHolder.getNGramEnds());
153
154 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
155
156 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
157 String fieldName = entry.getKey();
158
159 for (String nGram : entry.getValue()) {
160 addField(
161 document, fieldName, nGram, Field.Store.NO,
162 FieldInfo.IndexOptions.DOCS_AND_FREQS, false);
163 }
164 }
165
166 addNGramFields(document, nGramHolder.getNGramStarts());
167
168 return document;
169 }
170
171 @Override
172 protected void indexKeywords(
173 long companyId, long groupId, String languageId,
174 InputStream inputStream, String keywordFieldName,
175 String typeFieldValue, int maxNGramLength)
176 throws Exception {
177
178 IndexAccessor indexAccessor = LuceneHelperUtil.getIndexAccessor(
179 companyId);
180
181 IndexSearcher indexSearcher = null;
182
183 try {
184 String localizedFieldName = DocumentImpl.getLocalizedName(
185 languageId, keywordFieldName);
186
187 indexSearcher = LuceneHelperUtil.getSearcher(
188 indexAccessor.getCompanyId(), true);
189
190 List<IndexReader> indexReaders = new ArrayList<IndexReader>();
191
192 if (indexSearcher.maxDoc() > 0) {
193 ReaderUtil.gatherSubReaders(
194 indexReaders, indexSearcher.getIndexReader());
195 }
196
197 Collection<Document> documents = new ArrayList<Document>();
198
199 DictionaryReader dictionaryReader = new DictionaryReader(
200 inputStream, StringPool.UTF8);
201
202 Iterator<DictionaryEntry> iterator =
203 dictionaryReader.getDictionaryEntriesIterator();
204
205 while (iterator.hasNext()) {
206 DictionaryEntry dictionaryEntry = iterator.next();
207
208 String word = dictionaryEntry.getWord();
209
210 boolean validWord = isValidWord(
211 localizedFieldName, word, indexReaders);
212
213 if (!validWord) {
214 continue;
215 }
216
217 Document document = createDocument(
218 companyId, groupId, languageId, localizedFieldName, word,
219 dictionaryEntry.getWeight(), typeFieldValue,
220 maxNGramLength);
221
222 documents.add(document);
223 }
224
225 indexAccessor.addDocuments(documents);
226 }
227 finally {
228 LuceneHelperUtil.cleanUp(indexSearcher);
229 }
230 }
231
232 protected boolean isValidWord(
233 String localizedFieldName, String word,
234 List<IndexReader> indexReaders)
235 throws IOException {
236
237 if (word.length() < _MINIMUM_WORD_LENGTH) {
238 return false;
239 }
240
241 if (SpellCheckerUtil.isValidWord(
242 localizedFieldName, word, indexReaders)) {
243
244 return false;
245 }
246
247 return true;
248 }
249
250 private static final int _MINIMUM_WORD_LENGTH = 3;
251
252 }