001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.kernel.search.suggest;
016    
017    import com.liferay.portal.kernel.configuration.Filter;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.nio.charset.CharsetEncoderUtil;
021    import com.liferay.portal.kernel.search.Field;
022    import com.liferay.portal.kernel.search.SearchContext;
023    import com.liferay.portal.kernel.search.SearchException;
024    import com.liferay.portal.kernel.util.ArrayUtil;
025    import com.liferay.portal.kernel.util.Base64;
026    import com.liferay.portal.kernel.util.Digester;
027    import com.liferay.portal.kernel.util.DigesterUtil;
028    import com.liferay.portal.kernel.util.PortalClassLoaderUtil;
029    import com.liferay.portal.kernel.util.PropsKeys;
030    import com.liferay.portal.kernel.util.PropsUtil;
031    import com.liferay.portal.kernel.util.StreamUtil;
032    import com.liferay.portal.kernel.util.StringBundler;
033    import com.liferay.portal.kernel.util.StringPool;
034    import com.liferay.portal.kernel.util.StringUtil;
035    import com.liferay.portal.model.Group;
036    import com.liferay.portal.service.GroupLocalServiceUtil;
037    
038    import java.io.InputStream;
039    
040    import java.net.URL;
041    
042    import java.nio.CharBuffer;
043    import java.nio.charset.CharsetEncoder;
044    
045    import java.util.List;
046    
047    /**
048     * @author Michael C. Han
049     */
050    public abstract class BaseSpellCheckIndexWriter
051            implements SpellCheckIndexWriter {
052    
053            @Override
054            public void indexKeyword(
055                            SearchContext searchContext, float weight, String keywordType)
056                    throws SearchException {
057    
058                    if (!keywordType.equals(SuggestionConstants.TYPE_QUERY_SUGGESTION) &&
059                            !keywordType.equals(SuggestionConstants.TYPE_SPELL_CHECKER)) {
060    
061                            throw new IllegalArgumentException(
062                                    "Invalid keyword type " + keywordType);
063                    }
064    
065                    long groupId = 0;
066    
067                    long[] groupIds = searchContext.getGroupIds();
068    
069                    if ((groupIds != null) && (groupIds.length > 0)) {
070                            groupId = groupIds[1];
071                    }
072    
073                    String keywordFieldName = Field.KEYWORD_SEARCH;
074                    String typeFieldValue = SuggestionConstants.TYPE_QUERY_SUGGESTION;
075                    int maxNGramLength = _querySuggestionMaxNGramLength;
076    
077                    if (keywordType.equals(SuggestionConstants.TYPE_SPELL_CHECKER)) {
078                            keywordFieldName = Field.SPELL_CHECK_WORD;
079                            typeFieldValue = SuggestionConstants.TYPE_SPELL_CHECKER;
080                            maxNGramLength = 0;
081                    }
082    
083                    try {
084                            indexKeyword(
085                                    searchContext, groupId, searchContext.getLanguageId(),
086                                    searchContext.getKeywords(), weight, keywordFieldName,
087                                    typeFieldValue, maxNGramLength);
088                    }
089                    catch (Exception e) {
090                            throw new SearchException(e);
091                    }
092            }
093    
094            @Override
095            public void indexQuerySuggestionDictionaries(SearchContext searchContext)
096                    throws SearchException {
097    
098                    try {
099                            for (String languageId : _SUPPORTED_LOCALES) {
100                                    indexKeywords(
101                                            searchContext, languageId,
102                                            PropsKeys.INDEX_SEARCH_QUERY_SUGGESTION_DICTIONARY,
103                                            Field.KEYWORD_SEARCH,
104                                            SuggestionConstants.TYPE_QUERY_SUGGESTION,
105                                            _querySuggestionMaxNGramLength);
106                            }
107                    }
108                    catch (Exception e) {
109                            throw new SearchException(e);
110                    }
111            }
112    
113            @Override
114            public void indexQuerySuggestionDictionary(SearchContext searchContext)
115                    throws SearchException {
116    
117                    try {
118                            indexKeywords(
119                                    searchContext, searchContext.getLanguageId(),
120                                    PropsKeys.INDEX_SEARCH_QUERY_SUGGESTION_DICTIONARY,
121                                    Field.KEYWORD_SEARCH, SuggestionConstants.TYPE_QUERY_SUGGESTION,
122                                    _querySuggestionMaxNGramLength);
123                    }
124                    catch (Exception e) {
125                            throw new SearchException(e);
126                    }
127            }
128    
129            @Override
130            public void indexSpellCheckerDictionaries(SearchContext searchContext)
131                    throws SearchException {
132    
133                    try {
134                            for (String languageId : _SUPPORTED_LOCALES) {
135                                    indexKeywords(
136                                            searchContext, languageId,
137                                            PropsKeys.INDEX_SEARCH_SPELL_CHECKER_DICTIONARY,
138                                            Field.SPELL_CHECK_WORD,
139                                            SuggestionConstants.TYPE_SPELL_CHECKER, 0);
140                            }
141                    }
142                    catch (Exception e) {
143                            throw new SearchException(e);
144                    }
145            }
146    
147            @Override
148            public void indexSpellCheckerDictionary(SearchContext searchContext)
149                    throws SearchException {
150    
151                    try {
152                            indexKeywords(
153                                    searchContext, searchContext.getLanguageId(),
154                                    PropsKeys.INDEX_SEARCH_SPELL_CHECKER_DICTIONARY,
155                                    Field.SPELL_CHECK_WORD, SuggestionConstants.TYPE_SPELL_CHECKER,
156                                    0);
157                    }
158                    catch (Exception e) {
159                            throw new SearchException(e);
160                    }
161            }
162    
163            public void setQuerySuggestionMaxNGramLength(
164                    int querySuggestionMaxNGramLength) {
165    
166                    _querySuggestionMaxNGramLength = querySuggestionMaxNGramLength;
167            }
168    
169            protected URL getResource(String name) {
170                    ClassLoader contextClassLoader =
171                            Thread.currentThread().getContextClassLoader();
172    
173                    URL url = contextClassLoader.getResource(name);
174    
175                    if (url == null) {
176                            ClassLoader portalClassLoader =
177                                    PortalClassLoaderUtil.getClassLoader();
178    
179                            url = portalClassLoader.getResource(name);
180                    }
181    
182                    return url;
183            }
184    
185            protected String getUID(
186                    long companyId, String languageId, String word, String... parameters) {
187    
188                    StringBundler uidSB = new StringBundler(5);
189    
190                    uidSB.append(String.valueOf(companyId));
191                    uidSB.append(StringPool.UNDERLINE);
192                    uidSB.append(Field.SPELL_CHECK_WORD);
193                    uidSB.append(StringPool.UNDERLINE);
194    
195                    int length = 5;
196    
197                    if (parameters != null) {
198                            length += 2 * parameters.length;
199                    }
200    
201                    try {
202                            CharsetEncoder charsetEncoder =
203                                    CharsetEncoderUtil.getCharsetEncoder(StringPool.UTF8);
204    
205                            StringBundler keySB = new StringBundler(length);
206    
207                            keySB.append(languageId);
208                            keySB.append(StringPool.UNDERLINE);
209                            keySB.append(word);
210                            keySB.append(StringPool.UNDERLINE);
211    
212                            keySB.append(StringUtil.toLowerCase(word));
213    
214                            if (parameters != null) {
215                                    for (String parameter : parameters) {
216                                            keySB.append(parameter);
217                                            keySB.append(StringPool.UNDERLINE);
218                                    }
219                            }
220    
221                            String key = keySB.toString();
222    
223                            byte[] bytes = DigesterUtil.digestRaw(
224                                    Digester.MD5, charsetEncoder.encode(CharBuffer.wrap(key)));
225    
226                            uidSB.append(Base64.encode(bytes));
227                    }
228                    catch (Exception e) {
229                            throw new IllegalStateException(e);
230                    }
231    
232                    return uidSB.toString();
233            }
234    
235            protected abstract void indexKeyword(
236                            SearchContext searchContext, long groupId, String languageId,
237                            String keyword, float weight, String keywordFieldName,
238                            String typeFieldValue, int maxNGramLength)
239                    throws Exception;
240    
241            protected abstract void indexKeywords(
242                            SearchContext searchContext, long groupId, String languageId,
243                            InputStream inputStream, String keywordFieldName,
244                            String typeFieldValue, int maxNGramLength)
245                    throws Exception;
246    
247            protected void indexKeywords(
248                            SearchContext searchContext, long groupId, String languageId,
249                            String[] dictionaryFileNames, String keywordFieldName,
250                            String typeFieldValue, int maxNGramLength)
251                    throws Exception {
252    
253                    for (String dictionaryFileName : dictionaryFileNames) {
254                            InputStream inputStream = null;
255    
256                            if (_log.isInfoEnabled()) {
257                                    _log.info(
258                                            "Start indexing dictionary for " + dictionaryFileName);
259                            }
260    
261                            try {
262                                    URL url = getResource(dictionaryFileName);
263    
264                                    if (url == null) {
265                                            if (_log.isWarnEnabled()) {
266                                                    _log.warn("Unable to read " + dictionaryFileName);
267                                            }
268    
269                                            continue;
270                                    }
271    
272                                    inputStream = url.openStream();
273    
274                                    if (inputStream == null) {
275                                            if (_log.isWarnEnabled()) {
276                                                    _log.warn("Unable to read " + dictionaryFileName);
277                                            }
278    
279                                            continue;
280                                    }
281    
282                                    indexKeywords(
283                                            searchContext, groupId, languageId, inputStream,
284                                            keywordFieldName, typeFieldValue, maxNGramLength);
285                            }
286                            finally {
287                                    StreamUtil.cleanUp(inputStream);
288                            }
289    
290                            if (_log.isInfoEnabled()) {
291                                    _log.info(
292                                            "Finished indexing dictionary for " + dictionaryFileName);
293                            }
294                    }
295            }
296    
297            protected void indexKeywords(
298                            SearchContext searchContext, String languageId, String propsKey,
299                            String keywordFieldName, String typeFieldValue, int maxNGramLength)
300                    throws Exception {
301    
302                    String[] dictionaryFileNames = PropsUtil.getArray(
303                            propsKey, new Filter(languageId));
304    
305                    indexKeywords(
306                            searchContext, 0, languageId, dictionaryFileNames, keywordFieldName,
307                            typeFieldValue, maxNGramLength);
308    
309                    List<Group> groups = GroupLocalServiceUtil.getLiveGroups();
310    
311                    for (Group group : groups) {
312                            String[] groupDictionaryFileNames = PropsUtil.getArray(
313                                    propsKey,
314                                    new Filter(languageId, String.valueOf(group.getGroupId())));
315    
316                            if (ArrayUtil.isEmpty(groupDictionaryFileNames)) {
317                                    continue;
318                            }
319    
320                            indexKeywords(
321                                    searchContext, group.getGroupId(), languageId,
322                                    groupDictionaryFileNames, keywordFieldName, typeFieldValue,
323                                    maxNGramLength);
324                    }
325            }
326    
327            private static final String[] _SUPPORTED_LOCALES = StringUtil.split(
328                    PropsUtil.get(PropsKeys.INDEX_SEARCH_SPELL_CHECKER_SUPPORTED_LOCALES));
329    
330            private static final Log _log = LogFactoryUtil.getLog(
331                    BaseSpellCheckIndexWriter.class);
332    
333            private int _querySuggestionMaxNGramLength = 50;
334    
335    }