001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.liferay.portal.kernel.io.unsync.UnsyncStringReader;
018    import com.liferay.portal.kernel.log.Log;
019    import com.liferay.portal.kernel.log.LogFactoryUtil;
020    import com.liferay.portal.kernel.search.Field;
021    import com.liferay.portal.kernel.util.PropsKeys;
022    import com.liferay.portal.kernel.util.StringPool;
023    import com.liferay.portal.kernel.util.StringUtil;
024    import com.liferay.portal.kernel.util.Validator;
025    import com.liferay.portal.util.PropsUtil;
026    import com.liferay.util.lucene.KeywordsUtil;
027    
028    import java.io.IOException;
029    
030    import java.util.HashSet;
031    import java.util.Map;
032    import java.util.Set;
033    import java.util.concurrent.ConcurrentHashMap;
034    
035    import org.apache.lucene.analysis.Analyzer;
036    import org.apache.lucene.analysis.TokenStream;
037    import org.apache.lucene.analysis.WhitespaceAnalyzer;
038    import org.apache.lucene.document.Document;
039    import org.apache.lucene.index.Term;
040    import org.apache.lucene.queryParser.ParseException;
041    import org.apache.lucene.queryParser.QueryParser;
042    import org.apache.lucene.search.BooleanClause;
043    import org.apache.lucene.search.BooleanQuery;
044    import org.apache.lucene.search.IndexSearcher;
045    import org.apache.lucene.search.Query;
046    import org.apache.lucene.search.TermQuery;
047    import org.apache.lucene.search.WildcardQuery;
048    import org.apache.lucene.search.highlight.Highlighter;
049    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
050    import org.apache.lucene.search.highlight.QueryScorer;
051    import org.apache.lucene.search.highlight.QueryTermExtractor;
052    import org.apache.lucene.search.highlight.SimpleFragmenter;
053    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
054    import org.apache.lucene.search.highlight.WeightedTerm;
055    
056    /**
057     * @author Brian Wing Shun Chan
058     * @author Harry Mark
059     * @author Bruno Farache
060     */
061    public class LuceneHelperImpl implements LuceneHelper {
062    
063            public void addDocument(long companyId, Document document)
064                    throws IOException {
065    
066                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
067    
068                    indexAccessor.addDocument(document);
069            }
070    
071            public void addExactTerm(
072                    BooleanQuery booleanQuery, String field, String value) {
073    
074                    //text = KeywordsUtil.escape(value);
075    
076                    Query query = new TermQuery(new Term(field, value));
077    
078                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
079            }
080    
081            public void addRequiredTerm(
082                    BooleanQuery booleanQuery, String field, String value, boolean like) {
083    
084                    if (like) {
085                            value = StringUtil.replace(
086                                    value, StringPool.PERCENT, StringPool.STAR);
087    
088                            value = value.toLowerCase();
089    
090                            WildcardQuery wildcardQuery = new WildcardQuery(
091                                    new Term(field, value));
092    
093                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.MUST);
094                    }
095                    else {
096                            //text = KeywordsUtil.escape(value);
097    
098                            Term term = new Term(field, value);
099                            TermQuery termQuery = new TermQuery(term);
100    
101                            booleanQuery.add(termQuery, BooleanClause.Occur.MUST);
102                    }
103            }
104    
105            public void addTerm(
106                            BooleanQuery booleanQuery, String field, String value, boolean like)
107                    throws ParseException {
108    
109                    if (Validator.isNull(value)) {
110                            return;
111                    }
112    
113                    if (like) {
114                            value = StringUtil.replace(
115                                    value, StringPool.PERCENT, StringPool.BLANK);
116    
117                            value = value.toLowerCase();
118    
119                            Term term = new Term(
120                                    field, StringPool.STAR.concat(value).concat(StringPool.STAR));
121    
122                            WildcardQuery wildcardQuery = new WildcardQuery(term);
123    
124                            booleanQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
125                    }
126                    else {
127                            QueryParser queryParser = new QueryParser(field, getAnalyzer());
128    
129                            try {
130                                    Query query = queryParser.parse(value);
131    
132                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
133                            }
134                            catch (ParseException pe) {
135                                    if (_log.isDebugEnabled()) {
136                                            _log.debug(
137                                                    "ParseException thrown, reverting to literal search",
138                                                    pe);
139                                    }
140    
141                                    value = KeywordsUtil.escape(value);
142    
143                                    Query query = queryParser.parse(value);
144    
145                                    booleanQuery.add(query, BooleanClause.Occur.SHOULD);
146                            }
147                    }
148            }
149    
150            public void delete(long companyId) {
151                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
152    
153                    indexAccessor.delete();
154            }
155    
156            public void deleteDocuments(long companyId, Term term) throws IOException {
157                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
158    
159                    indexAccessor.deleteDocuments(term);
160            }
161    
162            public Analyzer getAnalyzer() {
163                    try {
164                            return (Analyzer)_analyzerClass.newInstance();
165                    }
166                    catch (Exception e) {
167                            throw new RuntimeException(e);
168                    }
169            }
170    
171            public String[] getQueryTerms(Query query) {
172                    String[] fieldNames = new String[] {
173                            Field.CONTENT, Field.DESCRIPTION, Field.PROPERTIES, Field.TITLE,
174                            Field.USER_NAME
175                    };
176    
177                    WeightedTerm[] weightedTerms = null;
178    
179                    for (String fieldName : fieldNames) {
180                            weightedTerms = QueryTermExtractor.getTerms(
181                                    query, false, fieldName);
182    
183                            if (weightedTerms.length > 0) {
184                                    break;
185                            }
186                    }
187    
188                    Set<String> queryTerms = new HashSet<String>();
189    
190                    for (WeightedTerm weightedTerm : weightedTerms) {
191                            queryTerms.add(weightedTerm.getTerm());
192                    }
193    
194                    return queryTerms.toArray(new String[queryTerms.size()]);
195            }
196    
197            public IndexSearcher getSearcher(long companyId, boolean readOnly)
198                    throws IOException {
199    
200                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
201    
202                    return new IndexSearcher(indexAccessor.getLuceneDir(), readOnly);
203            }
204    
205            public String getSnippet(
206                            Query query, String field, String s, int maxNumFragments,
207                            int fragmentLength, String fragmentSuffix, String preTag,
208                            String postTag)
209                    throws IOException {
210    
211                    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
212                            preTag, postTag);
213    
214                    QueryScorer queryScorer = new QueryScorer(query, field);
215    
216                    Highlighter highlighter = new Highlighter(
217                            simpleHTMLFormatter, queryScorer);
218    
219                    highlighter.setTextFragmenter(new SimpleFragmenter(fragmentLength));
220    
221                    TokenStream tokenStream = getAnalyzer().tokenStream(
222                            field, new UnsyncStringReader(s));
223    
224                    try {
225                            String snippet = highlighter.getBestFragments(
226                                    tokenStream, s, maxNumFragments, fragmentSuffix);
227    
228                            if (Validator.isNotNull(snippet) &&
229                                    !StringUtil.endsWith(snippet, fragmentSuffix)) {
230    
231                                    snippet = snippet + fragmentSuffix;
232                            }
233    
234                            return snippet;
235                    }
236                    catch (InvalidTokenOffsetsException itoe) {
237                            throw new IOException(itoe.getMessage());
238                    }
239            }
240    
241            public void updateDocument(long companyId, Term term, Document document)
242                    throws IOException {
243    
244                    IndexAccessor indexAccessor = _getIndexAccessor(companyId);
245    
246                    indexAccessor.updateDocument(term, document);
247            }
248    
249            public void shutdown() {
250                    for (IndexAccessor indexAccessor : _indexAccessorMap.values()) {
251                            indexAccessor.close();
252                    }
253            }
254    
255            private LuceneHelperImpl() {
256                    String analyzerName = PropsUtil.get(PropsKeys.LUCENE_ANALYZER);
257    
258                    if (Validator.isNotNull(analyzerName)) {
259                            try {
260                                    _analyzerClass = Class.forName(analyzerName);
261                            }
262                            catch (Exception e) {
263                                    _log.error(e);
264                            }
265                    }
266            }
267    
268            private IndexAccessor _getIndexAccessor(long companyId) {
269                    IndexAccessor indexAccessor = _indexAccessorMap.get(companyId);
270    
271                    if (indexAccessor == null) {
272                            synchronized (this) {
273                                    indexAccessor = _indexAccessorMap.get(companyId);
274    
275                                    if (indexAccessor == null) {
276                                            indexAccessor = new IndexAccessorImpl(companyId);
277    
278                                            _indexAccessorMap.put(companyId, indexAccessor);
279                                    }
280                            }
281                    }
282    
283                    return indexAccessor;
284            }
285    
286            private static Log _log = LogFactoryUtil.getLog(LuceneHelperImpl.class);
287    
288            private Class<?> _analyzerClass = WhitespaceAnalyzer.class;
289            private Map<Long, IndexAccessor> _indexAccessorMap =
290                    new ConcurrentHashMap<Long, IndexAccessor>();
291    
292    }