001
014
015 package com.liferay.portal.kernel.search;
016
017 import com.liferay.portal.kernel.log.Log;
018 import com.liferay.portal.kernel.log.LogFactoryUtil;
019 import com.liferay.portal.kernel.util.StringPool;
020 import com.liferay.portal.util.PortletKeys;
021
022 import java.io.InputStream;
023
024 import java.util.HashSet;
025 import java.util.Iterator;
026 import java.util.List;
027 import java.util.Map;
028 import java.util.Set;
029
030
033 public abstract class BaseGenericSpellCheckIndexWriter
034 extends BaseSpellCheckIndexWriter {
035
036 public void setBatchSize(int batchSize) {
037 _batchSize = batchSize;
038 }
039
040 public void setDocumentPrototype(Document documentPrototype) {
041 _documentPrototype = documentPrototype;
042 }
043
044 public void setIndexWriter(IndexWriter indexWriter) {
045 _indexWriter = indexWriter;
046 }
047
048 protected void addNGramFields(
049 Document document, Map<String, String> nGrams) {
050
051 for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
052 document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
053 }
054 }
055
056 protected Document createDocument(
057 long companyId, long groupId, String languageId, String keywords,
058 float weight, String keywordFieldName, String typeFieldValue,
059 int maxNGramLength)
060 throws SearchException {
061
062 Document document = (Document)_documentPrototype.clone();
063
064 document.addKeyword(Field.COMPANY_ID, companyId);
065 document.addKeyword(Field.GROUP_ID, groupId);
066 document.addKeyword(Field.LANGUAGE_ID, languageId);
067 document.addKeyword(Field.PORTLET_ID, PortletKeys.SEARCH);
068 document.addKeyword(Field.PRIORITY, String.valueOf(weight));
069 document.addKeyword(keywordFieldName, keywords);
070 document.addKeyword(Field.TYPE, typeFieldValue);
071 document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
072
073 NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
074 keywords, maxNGramLength);
075
076 addNGramFields(document, nGramHolder.getNGramEnds());
077
078 Map<String, List<String>> nGrams = nGramHolder.getNGrams();
079
080 for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
081 String fieldName = entry.getKey();
082
083 for (String nGram : entry.getValue()) {
084 document.addKeyword(fieldName, nGram);
085 }
086 }
087
088 addNGramFields(document, nGramHolder.getNGramStarts());
089
090 return document;
091 }
092
093 @Override
094 protected void indexKeywords(
095 long companyId, long groupId, String languageId,
096 InputStream inputStream, String keywordFieldName,
097 String typeFieldValue, int maxNGramLength)
098 throws Exception {
099
100 Set<Document> documents = new HashSet<Document>();
101
102 try {
103 DictionaryReader dictionaryReader = new DictionaryReader(
104 inputStream, StringPool.UTF8);
105
106 Iterator<DictionaryEntry> iterator =
107 dictionaryReader.getDictionaryEntriesIterator();
108
109 int counter = 0;
110
111 while (iterator.hasNext()) {
112 counter++;
113
114 DictionaryEntry dictionaryEntry = iterator.next();
115
116 Document document = createDocument(
117 companyId, groupId, languageId, dictionaryEntry.getWord(),
118 dictionaryEntry.getWeight(), keywordFieldName,
119 typeFieldValue, maxNGramLength);
120
121 documents.add(document);
122
123 if ((counter == _batchSize) || !iterator.hasNext()) {
124 _indexWriter.addDocuments(null, documents);
125
126 documents.clear();
127
128 counter = 0;
129 }
130 }
131 }
132 catch (Exception e) {
133 if (_log.isWarnEnabled()) {
134 _log.warn("Unable to index dictionaries", e);
135 }
136
137 throw new SearchException(e.getMessage(), e);
138 }
139 }
140
141 private static final int _DEFAULT_BATCH_SIZE = 1000;
142
143 private static Log _log = LogFactoryUtil.getLog(
144 BaseGenericSpellCheckIndexWriter.class);
145
146 private int _batchSize = _DEFAULT_BATCH_SIZE;
147 private Document _documentPrototype = new DocumentImpl();
148 private IndexWriter _indexWriter;
149
150 }