001
014
015 package com.liferay.portal.kernel.search.suggest;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.search.Document;
020 import com.liferay.portal.kernel.search.DocumentImpl;
021 import com.liferay.portal.kernel.search.Field;
022 import com.liferay.portal.kernel.search.SearchContext;
023 import com.liferay.portal.kernel.search.SearchException;
024 import com.liferay.portal.kernel.util.StringPool;
025
026 import java.io.InputStream;
027
028 import java.util.Collection;
029 import java.util.HashSet;
030 import java.util.Iterator;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034
035
038 public abstract class BaseGenericSpellCheckIndexWriter
039 extends BaseSpellCheckIndexWriter {
040
041 public void setBatchSize(int batchSize) {
042 _batchSize = batchSize;
043 }
044
045 public void setDocumentPrototype(Document documentPrototype) {
046 _documentPrototype = documentPrototype;
047 }
048
049 protected abstract void addDocument(
050 String documentType, SearchContext searchContext, Document document)
051 throws SearchException;
052
053 protected abstract void addDocuments(
054 String documentType, SearchContext searchContext,
055 Collection<Document> documents)
056 throws SearchException;
057
058 protected void addNGramFields(
059 Document document, Map<String, String> nGrams) {
060
061 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
062 document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
063 }
064 }
065
066 protected Document createDocument() {
067 return (Document)_documentPrototype.clone();
068 }
069
070 protected Document createDocument(
071 long companyId, long groupId, String languageId, String keywords,
072 float weight, String keywordFieldName, String typeFieldValue,
073 int maxNGramLength)
074 throws SearchException {
075
076 Document document = createDocument();
077
078 document.addKeyword(Field.COMPANY_ID, companyId);
079 document.addKeyword(Field.GROUP_ID, groupId);
080 document.addKeyword(Field.LANGUAGE_ID, languageId);
081 document.addKeyword(Field.PRIORITY, String.valueOf(weight));
082 document.addKeyword(Field.SPELL_CHECK_WORD, true);
083 document.addKeyword(keywordFieldName, keywords);
084 document.addKeyword(Field.TYPE, typeFieldValue);
085 document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
086
087 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
088 keywords, maxNGramLength);
089
090 addNGramFields(document, nGramHolder.getNGramEnds());
091
092 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
093
094 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
095 String fieldName = entry.getKey();
096
097 for (String nGram : entry.getValue()) {
098 document.addKeyword(fieldName, nGram);
099 }
100 }
101
102 addNGramFields(document, nGramHolder.getNGramStarts());
103
104 return document;
105 }
106
107 @Override
108 protected void indexKeyword(
109 SearchContext searchContext, long groupId, String languageId,
110 String keyword, float weight, String keywordFieldName,
111 String typeFieldValue, int maxNGramLength)
112 throws Exception {
113
114 Document document = createDocument(
115 searchContext.getCompanyId(), groupId, languageId, keyword, weight,
116 keywordFieldName, typeFieldValue, maxNGramLength);
117
118 addDocument(typeFieldValue, searchContext, document);
119 }
120
121 @Override
122 protected void indexKeywords(
123 SearchContext searchContext, long groupId, String languageId,
124 InputStream inputStream, String keywordFieldName,
125 String typeFieldValue, int maxNGramLength)
126 throws Exception {
127
128 Set<Document> documents = new HashSet<>();
129
130 try {
131 DictionaryReader dictionaryReader = new DictionaryReader(
132 inputStream, StringPool.UTF8);
133
134 Iterator<DictionaryEntry> iterator =
135 dictionaryReader.getDictionaryEntriesIterator();
136
137 int counter = 0;
138
139 while (iterator.hasNext()) {
140 counter++;
141
142 DictionaryEntry dictionaryEntry = iterator.next();
143
144 Document document = createDocument(
145 searchContext.getCompanyId(), groupId, languageId,
146 dictionaryEntry.getWord(), dictionaryEntry.getWeight(),
147 keywordFieldName, typeFieldValue, maxNGramLength);
148
149 documents.add(document);
150
151 if ((counter == _batchSize) || !iterator.hasNext()) {
152 addDocuments(typeFieldValue, searchContext, documents);
153
154 documents.clear();
155
156 counter = 0;
157 }
158 }
159 }
160 catch (Exception e) {
161 if (_log.isWarnEnabled()) {
162 _log.warn("Unable to index dictionaries", e);
163 }
164
165 throw new SearchException(e.getMessage(), e);
166 }
167 }
168
169 private static final int _DEFAULT_BATCH_SIZE = 1000;
170
171 private static final Log _log = LogFactoryUtil.getLog(
172 BaseGenericSpellCheckIndexWriter.class);
173
174 private int _batchSize = _DEFAULT_BATCH_SIZE;
175 private Document _documentPrototype = new DocumentImpl();
176
177 }