001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search.suggest;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.search.Document;
020    import com.liferay.portal.kernel.search.DocumentImpl;
021    import com.liferay.portal.kernel.search.Field;
022    import com.liferay.portal.kernel.search.SearchContext;
023    import com.liferay.portal.kernel.search.SearchException;
024    import com.liferay.portal.kernel.util.StringPool;
025    
026    import java.io.InputStream;
027    
028    import java.util.Collection;
029    import java.util.HashSet;
030    import java.util.Iterator;
031    import java.util.List;
032    import java.util.Map;
033    import java.util.Set;
034    
035    /**
036     * @author Michael C. Han
037     */
038    public abstract class BaseGenericSpellCheckIndexWriter
039            extends BaseSpellCheckIndexWriter {
040    
041            public void setBatchSize(int batchSize) {
042                    _batchSize = batchSize;
043            }
044    
045            public void setDocumentPrototype(Document documentPrototype) {
046                    _documentPrototype = documentPrototype;
047            }
048    
049            protected abstract void addDocument(
050                            String documentType, SearchContext searchContext, Document document)
051                    throws SearchException;
052    
053            protected abstract void addDocuments(
054                            String documentType, SearchContext searchContext,
055                            Collection<Document> documents)
056                    throws SearchException;
057    
058            protected void addNGramFields(
059                    Document document, Map<String, String> nGrams) {
060    
061                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
062                            document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
063                    }
064            }
065    
066            protected Document createDocument() {
067                    return (Document)_documentPrototype.clone();
068            }
069    
070            protected Document createDocument(
071                            long companyId, long groupId, String languageId, String keywords,
072                            float weight, String keywordFieldName, String typeFieldValue,
073                            int maxNGramLength)
074                    throws SearchException {
075    
076                    Document document = createDocument();
077    
078                    document.addKeyword(Field.COMPANY_ID, companyId);
079                    document.addKeyword(Field.GROUP_ID, groupId);
080                    document.addKeyword(Field.LANGUAGE_ID, languageId);
081                    document.addKeyword(Field.PRIORITY, String.valueOf(weight));
082                    document.addKeyword(Field.SPELL_CHECK_WORD, true);
083                    document.addKeyword(keywordFieldName, keywords);
084                    document.addKeyword(Field.TYPE, typeFieldValue);
085                    document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
086    
087                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
088                            keywords, maxNGramLength);
089    
090                    addNGramFields(document, nGramHolder.getNGramEnds());
091    
092                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
093    
094                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
095                            String fieldName = entry.getKey();
096    
097                            for (String nGram : entry.getValue()) {
098                                    document.addKeyword(fieldName, nGram);
099                            }
100                    }
101    
102                    addNGramFields(document, nGramHolder.getNGramStarts());
103    
104                    return document;
105            }
106    
107            @Override
108            protected void indexKeyword(
109                            SearchContext searchContext, long groupId, String languageId,
110                            String keyword, float weight, String keywordFieldName,
111                            String typeFieldValue, int maxNGramLength)
112                    throws Exception {
113    
114                    Document document = createDocument(
115                            searchContext.getCompanyId(), groupId, languageId, keyword, weight,
116                            keywordFieldName, typeFieldValue, maxNGramLength);
117    
118                    addDocument(typeFieldValue, searchContext, document);
119            }
120    
121            @Override
122            protected void indexKeywords(
123                            SearchContext searchContext, long groupId, String languageId,
124                            InputStream inputStream, String keywordFieldName,
125                            String typeFieldValue, int maxNGramLength)
126                    throws Exception {
127    
128                    Set<Document> documents = new HashSet<>();
129    
130                    try {
131                            DictionaryReader dictionaryReader = new DictionaryReader(
132                                    inputStream, StringPool.UTF8);
133    
134                            Iterator<DictionaryEntry> iterator =
135                                    dictionaryReader.getDictionaryEntriesIterator();
136    
137                            int counter = 0;
138    
139                            while (iterator.hasNext()) {
140                                    counter++;
141    
142                                    DictionaryEntry dictionaryEntry = iterator.next();
143    
144                                    Document document = createDocument(
145                                            searchContext.getCompanyId(), groupId, languageId,
146                                            dictionaryEntry.getWord(), dictionaryEntry.getWeight(),
147                                            keywordFieldName, typeFieldValue, maxNGramLength);
148    
149                                    documents.add(document);
150    
151                                    if ((counter == _batchSize) || !iterator.hasNext()) {
152                                            addDocuments(typeFieldValue, searchContext, documents);
153    
154                                            documents.clear();
155    
156                                            counter = 0;
157                                    }
158                            }
159                    }
160                    catch (Exception e) {
161                            if (_log.isWarnEnabled()) {
162                                    _log.warn("Unable to index dictionaries", e);
163                            }
164    
165                            throw new SearchException(e.getMessage(), e);
166                    }
167            }
168    
169            private static final int _DEFAULT_BATCH_SIZE = 1000;
170    
171            private static final Log _log = LogFactoryUtil.getLog(
172                    BaseGenericSpellCheckIndexWriter.class);
173    
174            private int _batchSize = _DEFAULT_BATCH_SIZE;
175            private Document _documentPrototype = new DocumentImpl();
176    
177    }