001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.util.StringPool;
020    import com.liferay.portal.util.PortletKeys;
021    
022    import java.io.InputStream;
023    
024    import java.util.Collection;
025    import java.util.HashSet;
026    import java.util.Iterator;
027    import java.util.List;
028    import java.util.Map;
029    import java.util.Set;
030    
031    /**
032     * @author Michael C. Han
033     */
034    public abstract class BaseGenericSpellCheckIndexWriter
035            extends BaseSpellCheckIndexWriter {
036    
037            public void setBatchSize(int batchSize) {
038                    _batchSize = batchSize;
039            }
040    
041            public void setDocumentPrototype(Document documentPrototype) {
042                    _documentPrototype = documentPrototype;
043            }
044    
045            protected abstract void addDocument(
046                            String documentType, SearchContext searchContext, Document document)
047                    throws SearchException;
048    
049            protected abstract void addDocuments(
050                            String documentType, SearchContext searchContext,
051                            Collection<Document> documents)
052                    throws SearchException;
053    
054            protected void addNGramFields(
055                    Document document, Map<String, String> nGrams) {
056    
057                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
058                            document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
059                    }
060            }
061    
062            protected Document createDocument() {
063                    return (Document)_documentPrototype.clone();
064            }
065    
066            protected Document createDocument(
067                            long companyId, long groupId, String languageId, String keywords,
068                            float weight, String keywordFieldName, String typeFieldValue,
069                            int maxNGramLength)
070                    throws SearchException {
071    
072                    Document document = createDocument();
073    
074                    document.addKeyword(Field.COMPANY_ID, companyId);
075                    document.addKeyword(Field.GROUP_ID, groupId);
076                    document.addKeyword(Field.LANGUAGE_ID, languageId);
077                    document.addKeyword(Field.PORTLET_ID, PortletKeys.SEARCH);
078                    document.addKeyword(Field.PRIORITY, String.valueOf(weight));
079                    document.addKeyword(keywordFieldName, keywords);
080                    document.addKeyword(Field.TYPE, typeFieldValue);
081                    document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
082    
083                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
084                            keywords, maxNGramLength);
085    
086                    addNGramFields(document, nGramHolder.getNGramEnds());
087    
088                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
089    
090                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
091                            String fieldName = entry.getKey();
092    
093                            for (String nGram : entry.getValue()) {
094                                    document.addKeyword(fieldName, nGram);
095                            }
096                    }
097    
098                    addNGramFields(document, nGramHolder.getNGramStarts());
099    
100                    return document;
101            }
102    
103            @Override
104            protected void indexKeyword(
105                            SearchContext searchContext, long groupId, String languageId,
106                            String keyword, float weight, String keywordFieldName,
107                            String typeFieldValue, int maxNGramLength)
108                    throws Exception {
109    
110                    Document document = createDocument(
111                            searchContext.getCompanyId(), groupId, languageId, keyword, weight,
112                            keywordFieldName, typeFieldValue, maxNGramLength);
113    
114                    addDocument(typeFieldValue, searchContext, document);
115            }
116    
117            @Override
118            protected void indexKeywords(
119                            SearchContext searchContext, long groupId, String languageId,
120                            InputStream inputStream, String keywordFieldName,
121                            String typeFieldValue, int maxNGramLength)
122                    throws Exception {
123    
124                    Set<Document> documents = new HashSet<Document>();
125    
126                    try {
127                            DictionaryReader dictionaryReader = new DictionaryReader(
128                                    inputStream, StringPool.UTF8);
129    
130                            Iterator<DictionaryEntry> iterator =
131                                    dictionaryReader.getDictionaryEntriesIterator();
132    
133                            int counter = 0;
134    
135                            while (iterator.hasNext()) {
136                                    counter++;
137    
138                                    DictionaryEntry dictionaryEntry = iterator.next();
139    
140                                    Document document = createDocument(
141                                            searchContext.getCompanyId(), groupId, languageId,
142                                            dictionaryEntry.getWord(), dictionaryEntry.getWeight(),
143                                            keywordFieldName, typeFieldValue, maxNGramLength);
144    
145                                    documents.add(document);
146    
147                                    if ((counter == _batchSize) || !iterator.hasNext()) {
148                                            addDocuments(typeFieldValue, searchContext, documents);
149    
150                                            documents.clear();
151    
152                                            counter = 0;
153                                    }
154                            }
155                    }
156                    catch (Exception e) {
157                            if (_log.isWarnEnabled()) {
158                                    _log.warn("Unable to index dictionaries", e);
159                            }
160    
161                            throw new SearchException(e.getMessage(), e);
162                    }
163            }
164    
165            private static final int _DEFAULT_BATCH_SIZE = 1000;
166    
167            private static Log _log = LogFactoryUtil.getLog(
168                    BaseGenericSpellCheckIndexWriter.class);
169    
170            private int _batchSize = _DEFAULT_BATCH_SIZE;
171            private Document _documentPrototype = new DocumentImpl();
172    
173    }