001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search;
016    
017    import com.liferay.portal.kernel.log.Log;
018    import com.liferay.portal.kernel.log.LogFactoryUtil;
019    import com.liferay.portal.kernel.util.StringPool;
020    import com.liferay.portal.util.PortletKeys;
021    
022    import java.io.InputStream;
023    
024    import java.util.HashSet;
025    import java.util.Iterator;
026    import java.util.List;
027    import java.util.Map;
028    import java.util.Set;
029    
030    /**
031     * @author Michael C. Han
032     */
033    public abstract class BaseGenericSpellCheckIndexWriter
034            extends BaseSpellCheckIndexWriter {
035    
036            public void setBatchSize(int batchSize) {
037                    _batchSize = batchSize;
038            }
039    
040            public void setDocumentPrototype(Document documentPrototype) {
041                    _documentPrototype = documentPrototype;
042            }
043    
044            public void setIndexWriter(IndexWriter indexWriter) {
045                    _indexWriter = indexWriter;
046            }
047    
048            protected void addNGramFields(
049                    Document document, Map<String, String> nGrams) {
050    
051                    for (Map.Entry<String, String> nGramEntry : nGrams.entrySet()) {
052                            document.addKeyword(nGramEntry.getKey(), nGramEntry.getValue());
053                    }
054            }
055    
056            protected Document createDocument(
057                            long companyId, long groupId, String languageId, String keywords,
058                            float weight, String keywordFieldName, String typeFieldValue,
059                            int maxNGramLength)
060                    throws SearchException {
061    
062                    Document document = (Document)_documentPrototype.clone();
063    
064                    document.addKeyword(Field.COMPANY_ID, companyId);
065                    document.addKeyword(Field.GROUP_ID, groupId);
066                    document.addKeyword(Field.LANGUAGE_ID, languageId);
067                    document.addKeyword(Field.PORTLET_ID, PortletKeys.SEARCH);
068                    document.addKeyword(Field.PRIORITY, String.valueOf(weight));
069                    document.addKeyword(keywordFieldName, keywords);
070                    document.addKeyword(Field.TYPE, typeFieldValue);
071                    document.addKeyword(Field.UID, getUID(companyId, languageId, keywords));
072    
073                    NGramHolder nGramHolder = NGramHolderBuilderUtil.buildNGramHolder(
074                            keywords, maxNGramLength);
075    
076                    addNGramFields(document, nGramHolder.getNGramEnds());
077    
078                    Map<String, List<String>> nGrams = nGramHolder.getNGrams();
079    
080                    for (Map.Entry<String, List<String>> entry : nGrams.entrySet()) {
081                            String fieldName = entry.getKey();
082    
083                            for (String nGram : entry.getValue()) {
084                                    document.addKeyword(fieldName, nGram);
085                            }
086                    }
087    
088                    addNGramFields(document, nGramHolder.getNGramStarts());
089    
090                    return document;
091            }
092    
093            @Override
094            protected void indexKeywords(
095                            long companyId, long groupId, String languageId,
096                            InputStream inputStream, String keywordFieldName,
097                            String typeFieldValue, int maxNGramLength)
098                    throws Exception {
099    
100                    Set<Document> documents = new HashSet<Document>();
101    
102                    try {
103                            DictionaryReader dictionaryReader = new DictionaryReader(
104                                    inputStream, StringPool.UTF8);
105    
106                            Iterator<DictionaryEntry> iterator =
107                                    dictionaryReader.getDictionaryEntriesIterator();
108    
109                            int counter = 0;
110    
111                            while (iterator.hasNext()) {
112                                    counter++;
113    
114                                    DictionaryEntry dictionaryEntry = iterator.next();
115    
116                                    Document document = createDocument(
117                                            companyId, groupId, languageId, dictionaryEntry.getWord(),
118                                            dictionaryEntry.getWeight(), keywordFieldName,
119                                            typeFieldValue, maxNGramLength);
120    
121                                    documents.add(document);
122    
123                                    if ((counter == _batchSize) || !iterator.hasNext()) {
124                                            _indexWriter.addDocuments(null, documents);
125    
126                                            documents.clear();
127    
128                                            counter = 0;
129                                    }
130                            }
131                    }
132                    catch (Exception e) {
133                            if (_log.isWarnEnabled()) {
134                                    _log.warn("Unable to index dictionaries", e);
135                            }
136    
137                            throw new SearchException(e.getMessage(), e);
138                    }
139            }
140    
141            private static final int _DEFAULT_BATCH_SIZE = 1000;
142    
143            private static Log _log = LogFactoryUtil.getLog(
144                    BaseGenericSpellCheckIndexWriter.class);
145    
146            private int _batchSize = _DEFAULT_BATCH_SIZE;
147            private Document _documentPrototype = new DocumentImpl();
148            private IndexWriter _indexWriter;
149    
150    }