001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.BoboSubBrowser;
020    import com.browseengine.bobo.api.Browsable;
021    import com.browseengine.bobo.api.BrowseHit;
022    import com.browseengine.bobo.api.BrowseRequest;
023    import com.browseengine.bobo.api.BrowseResult;
024    import com.browseengine.bobo.api.FacetAccessible;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
029    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032    
033    import com.liferay.portal.kernel.dao.orm.QueryUtil;
034    import com.liferay.portal.kernel.dao.search.SearchPaginationUtil;
035    import com.liferay.portal.kernel.json.JSONArray;
036    import com.liferay.portal.kernel.json.JSONObject;
037    import com.liferay.portal.kernel.log.Log;
038    import com.liferay.portal.kernel.log.LogFactoryUtil;
039    import com.liferay.portal.kernel.search.BaseIndexSearcher;
040    import com.liferay.portal.kernel.search.Document;
041    import com.liferay.portal.kernel.search.DocumentImpl;
042    import com.liferay.portal.kernel.search.Field;
043    import com.liferay.portal.kernel.search.Hits;
044    import com.liferay.portal.kernel.search.HitsImpl;
045    import com.liferay.portal.kernel.search.ParseException;
046    import com.liferay.portal.kernel.search.Query;
047    import com.liferay.portal.kernel.search.QueryConfig;
048    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
049    import com.liferay.portal.kernel.search.SearchContext;
050    import com.liferay.portal.kernel.search.SearchException;
051    import com.liferay.portal.kernel.search.Sort;
052    import com.liferay.portal.kernel.search.facet.Facet;
053    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
054    import com.liferay.portal.kernel.search.facet.RangeFacet;
055    import com.liferay.portal.kernel.search.facet.SimpleFacet;
056    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
057    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
058    import com.liferay.portal.kernel.util.ArrayUtil;
059    import com.liferay.portal.kernel.util.ReflectionUtil;
060    import com.liferay.portal.kernel.util.SetUtil;
061    import com.liferay.portal.kernel.util.StringPool;
062    import com.liferay.portal.kernel.util.StringUtil;
063    import com.liferay.portal.kernel.util.Time;
064    import com.liferay.portal.kernel.util.Validator;
065    import com.liferay.portal.search.BoboFacetCollector;
066    import com.liferay.portal.util.PropsValues;
067    
068    import java.io.IOException;
069    
070    import java.util.ArrayList;
071    import java.util.Collections;
072    import java.util.HashSet;
073    import java.util.List;
074    import java.util.Locale;
075    import java.util.Map;
076    import java.util.Set;
077    
078    import org.apache.lucene.document.FieldSelector;
079    import org.apache.lucene.document.NumericField;
080    import org.apache.lucene.document.SetBasedFieldSelector;
081    import org.apache.lucene.index.IndexReader;
082    import org.apache.lucene.search.BooleanQuery;
083    import org.apache.lucene.search.Explanation;
084    import org.apache.lucene.search.IndexSearcher;
085    import org.apache.lucene.search.SortField;
086    import org.apache.lucene.search.highlight.Formatter;
087    import org.apache.lucene.search.highlight.TokenGroup;
088    
089    /**
090     * @author Bruno Farache
091     */
092    public class LuceneIndexSearcher extends BaseIndexSearcher {
093    
094            @Override
095            public Hits search(SearchContext searchContext, Query query)
096                    throws SearchException {
097    
098                    if (_log.isDebugEnabled()) {
099                            _log.debug("Query " + query);
100                    }
101    
102                    Hits hits = null;
103    
104                    IndexSearcher indexSearcher = null;
105                    Map<String, Facet> facets = null;
106                    BoboBrowser boboBrowser = null;
107                    BrowseRequest browseRequest = null;
108    
109                    try {
110                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
111                                    searchContext.getCompanyId());
112    
113                            List<FacetHandler<?>> facetHandlers =
114                                    new ArrayList<FacetHandler<?>>();
115    
116                            facets = searchContext.getFacets();
117    
118                            for (Facet facet : facets.values()) {
119                                    if (facet.isStatic()) {
120                                            continue;
121                                    }
122    
123                                    FacetConfiguration facetConfiguration =
124                                            facet.getFacetConfiguration();
125    
126                                    if (facet instanceof MultiValueFacet) {
127                                            MultiValueFacetHandler multiValueFacetHandler =
128                                                    new MultiValueFacetHandler(
129                                                            facetConfiguration.getFieldName(),
130                                                            facetConfiguration.getFieldName());
131    
132                                            JSONObject dataJSONObject = facetConfiguration.getData();
133    
134                                            if (dataJSONObject.has("maxTerms")) {
135                                                    multiValueFacetHandler.setMaxItems(
136                                                            dataJSONObject.getInt("maxTerms"));
137                                            }
138    
139                                            facetHandlers.add(multiValueFacetHandler);
140                                    }
141                                    else if (facet instanceof RangeFacet) {
142                                            List<String> ranges = new ArrayList<String>();
143    
144                                            JSONObject dataJSONObject = facetConfiguration.getData();
145    
146                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
147                                                    "ranges");
148    
149                                            if (rangesJSONArray != null) {
150                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
151                                                            JSONObject rangeJSONObject =
152                                                                    rangesJSONArray.getJSONObject(i);
153    
154                                                            ranges.add(rangeJSONObject.getString("range"));
155                                                    }
156                                            }
157    
158                                            RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
159                                                    facetConfiguration.getFieldName(),
160                                                    facetConfiguration.getFieldName(), ranges);
161    
162                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
163    
164                                            facetHandlers.add(rangeFacetHandler);
165                                    }
166                                    else if (facet instanceof SimpleFacet) {
167                                            SimpleFacetHandler simpleFacetHandler =
168                                                    new SimpleFacetHandler(
169                                                            facetConfiguration.getFieldName(),
170                                                            facetConfiguration.getFieldName());
171    
172                                            facetHandlers.add(simpleFacetHandler);
173                                    }
174                            }
175    
176                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
177                                    indexSearcher.getIndexReader(), facetHandlers);
178    
179                            SortField[] sortFields = new SortField[0];
180    
181                            Sort[] sorts = searchContext.getSorts();
182    
183                            if (sorts != null) {
184                                    sortFields = new SortField[sorts.length];
185    
186                                    for (int i = 0; i < sorts.length; i++) {
187                                            Sort sort = sorts[i];
188    
189                                            if ((sort.getType() == Sort.STRING_TYPE) &&
190                                                    (searchContext.getLocale() != null)) {
191    
192                                                    sortFields[i] = new SortField(
193                                                            sort.getFieldName(), searchContext.getLocale(),
194                                                            sort.isReverse());
195                                            }
196                                            else {
197                                                    sortFields[i] = new SortField(
198                                                            sort.getFieldName(), sort.getType(),
199                                                            sort.isReverse());
200                                            }
201                                    }
202                            }
203    
204                            browseRequest = new BrowseRequest();
205    
206                            for (Facet facet : facets.values()) {
207                                    if (facet.isStatic()) {
208                                            continue;
209                                    }
210    
211                                    FacetConfiguration facetConfiguration =
212                                            facet.getFacetConfiguration();
213    
214                                    FacetSpec facetSpec = new FacetSpec();
215    
216                                    facetSpec.setOrderBy(
217                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
218    
219                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
220                            }
221    
222                            int end = searchContext.getEnd();
223    
224                            if ((end == QueryUtil.ALL_POS) ||
225                                    (end > PropsValues.INDEX_SEARCH_LIMIT)) {
226    
227                                    end = PropsValues.INDEX_SEARCH_LIMIT;
228                            }
229    
230                            browseRequest.setCount(end);
231                            browseRequest.setOffset(0);
232                            browseRequest.setQuery(
233                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
234                                            query));
235                            browseRequest.setSort(sortFields);
236    
237                            boboBrowser = new BoboBrowser(boboIndexReader);
238    
239                            long startTime = System.currentTimeMillis();
240    
241                            BrowseResult browseResult = boboBrowser.browse(browseRequest);
242    
243                            long endTime = System.currentTimeMillis();
244    
245                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
246    
247                            hits = toHits(
248                                    indexSearcher, browseResult, query, startTime, searchTime,
249                                    searchContext.getStart(), searchContext.getEnd());
250    
251                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
252    
253                            for (Map.Entry<String, FacetAccessible> entry :
254                                            facetMap.entrySet()) {
255    
256                                    Facet facet = facets.get(entry.getKey());
257    
258                                    FacetAccessible facetAccessible = entry.getValue();
259    
260                                    FacetCollector facetCollector = new BoboFacetCollector(
261                                            entry.getKey(), facetAccessible);
262    
263                                    facet.setFacetCollector(facetCollector);
264                            }
265                    }
266                    catch (BooleanQuery.TooManyClauses tmc) {
267                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
268    
269                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
270    
271                            try {
272                                    long startTime = System.currentTimeMillis();
273    
274                                    BrowseResult browseResult = boboBrowser.browse(browseRequest);
275    
276                                    long endTime = System.currentTimeMillis();
277    
278                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
279    
280                                    hits = toHits(
281                                            indexSearcher, browseResult, query, startTime, searchTime,
282                                            searchContext.getStart(), searchContext.getEnd());
283    
284                                    Map<String, FacetAccessible> facetMap =
285                                            browseResult.getFacetMap();
286    
287                                    for (Map.Entry<String, FacetAccessible> entry :
288                                                    facetMap.entrySet()) {
289    
290                                            Facet facet = facets.get(entry.getKey());
291    
292                                            FacetAccessible facetAccessible = entry.getValue();
293    
294                                            FacetCollector facetCollector = new BoboFacetCollector(
295                                                    entry.getKey(), facetAccessible);
296    
297                                            facet.setFacetCollector(facetCollector);
298                                    }
299                            }
300                            catch (Exception e) {
301                                    throw new SearchException(e);
302                            }
303                            finally {
304                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
305                            }
306                    }
307                    catch (ParseException pe) {
308                            _log.error("Query " + query, pe);
309    
310                            return new HitsImpl();
311                    }
312                    catch (Exception e) {
313                            throw new SearchException(e);
314                    }
315                    finally {
316                            cleanUp(boboBrowser);
317    
318                            try {
319                                    LuceneHelperUtil.releaseIndexSearcher(
320                                            searchContext.getCompanyId(), indexSearcher);
321                            }
322                            catch (IOException ioe) {
323                                    _log.error("Unable to release searcher", ioe);
324                            }
325                    }
326    
327                    if (_log.isDebugEnabled()) {
328                            _log.debug(
329                                    "Search found " + hits.getLength() + " results in " +
330                                            hits.getSearchTime() + "ms");
331                    }
332    
333                    return hits;
334            }
335    
336            protected void cleanUp(BoboBrowser boboBrowser) {
337                    if (boboBrowser == null) {
338                            return;
339                    }
340    
341                    try {
342                            boboBrowser.close();
343                    }
344                    catch (IOException ioe) {
345                            _log.error(ioe, ioe);
346                    }
347    
348                    Browsable[] browsables = boboBrowser.getSubBrowsers();
349    
350                    for (Browsable browsable : browsables) {
351                            if (!(browsable instanceof BoboSubBrowser)) {
352                                    continue;
353                            }
354    
355                            BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
356    
357                            BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
358    
359                            try {
360                                    ThreadLocal<?> threadLocal =
361                                            (ThreadLocal<?>)_runtimeFacetDataMapField.get(
362                                                    boboIndexReader);
363    
364                                    threadLocal.remove();
365    
366                                    _runtimeFacetDataMapField.set(boboIndexReader, null);
367                            }
368                            catch (Exception e) {
369                                    _log.error(
370                                            "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
371                                            e);
372                            }
373    
374                            try {
375                                    ThreadLocal<?> threadLocal =
376                                            (ThreadLocal<?>)_runtimeFacetHandlerMapField.get(
377                                                    boboIndexReader);
378    
379                                    threadLocal.remove();
380    
381                                    _runtimeFacetHandlerMapField.set(boboIndexReader, null);
382                            }
383                            catch (Exception e) {
384                                    _log.error(
385                                            "Unable to clean up BoboIndexReader#" +
386                                                    "_runtimeFacetHandlerMap",
387                                            e);
388                            }
389                    }
390            }
391    
392            protected DocumentImpl getDocument(
393                    org.apache.lucene.document.Document oldDocument) {
394    
395                    DocumentImpl newDocument = new DocumentImpl();
396    
397                    List<org.apache.lucene.document.Fieldable> oldFieldables =
398                            oldDocument.getFields();
399    
400                    for (org.apache.lucene.document.Fieldable oldFieldable :
401                                    oldFieldables) {
402    
403                            Field newField = null;
404    
405                            String[] values = oldDocument.getValues(oldFieldable.name());
406    
407                            if ((values != null) && (values.length > 1)) {
408                                    newField = new Field(oldFieldable.name(), values);
409                            }
410                            else {
411                                    newField = new Field(
412                                            oldFieldable.name(), oldFieldable.stringValue());
413                            }
414    
415                            newField.setNumeric(oldFieldable instanceof NumericField);
416                            newField.setTokenized(oldFieldable.isTokenized());
417    
418                            newDocument.add(newField);
419                    }
420    
421                    return newDocument;
422            }
423    
424            protected Set<String> getQueryTerms(Query query) {
425                    Set<String> queryTerms = new HashSet<String>();
426    
427                    try {
428                            queryTerms = LuceneHelperUtil.getQueryTerms(
429                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
430                                            query));
431                    }
432                    catch (ParseException pe) {
433                            _log.error("Query " + query, pe);
434                    }
435    
436                    return queryTerms;
437            }
438    
439            protected String getSnippet(
440                            org.apache.lucene.document.Document doc, Query query, String field,
441                            Locale locale, Document hitDoc, Set<String> matchingTerms)
442                    throws IOException {
443    
444                    String snippetField = DocumentImpl.getLocalizedName(locale, field);
445                    String snippet = null;
446    
447                    try {
448                            org.apache.lucene.search.Query luceneQuery =
449                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
450                                            query);
451    
452                            String[] values = doc.getValues(snippetField);
453    
454                            TermCollectingFormatter termCollectingFormatter =
455                                    new TermCollectingFormatter();
456    
457                            if (ArrayUtil.isNotEmpty(values)) {
458                                    snippet = LuceneHelperUtil.getSnippet(
459                                            luceneQuery, snippetField, StringUtil.merge(values),
460                                            termCollectingFormatter);
461                            }
462    
463                            if (ArrayUtil.isEmpty(values) || Validator.isNull(snippet)) {
464                                    snippetField = field;
465    
466                                    values = doc.getValues(snippetField);
467    
468                                    if (ArrayUtil.isEmpty(values)) {
469                                            return StringPool.BLANK;
470                                    }
471    
472                                    snippet = LuceneHelperUtil.getSnippet(
473                                            luceneQuery, field, StringUtil.merge(values),
474                                            termCollectingFormatter);
475                            }
476    
477                            if (Validator.isNull(snippet)) {
478                                    return StringPool.BLANK;
479                            }
480    
481                            matchingTerms.addAll(termCollectingFormatter.getTerms());
482                    }
483                    catch (ParseException pe) {
484                            _log.error("Query " + query, pe);
485                    }
486    
487                    hitDoc.addText(
488                            Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
489                            snippet);
490    
491                    return snippet;
492            }
493    
494            protected Hits toHits(
495                            IndexSearcher indexSearcher, BrowseResult browseResult, Query query,
496                            long startTime, float searchTime, int start, int end)
497                    throws IOException, ParseException {
498    
499                    int total = browseResult.getNumHits();
500    
501                    BrowseHit[] browseHits = browseResult.getHits();
502    
503                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
504                            start = 0;
505                            end = total;
506                    }
507    
508                    int[] startAndEnd = SearchPaginationUtil.calculateStartAndEnd(
509                            start, end, total);
510    
511                    start = startAndEnd[0];
512                    end = startAndEnd[1];
513    
514                    Set<String> queryTerms = new HashSet<String>();
515    
516                    IndexReader indexReader = indexSearcher.getIndexReader();
517    
518                    List<String> indexedFieldNames = new ArrayList<String> (
519                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
520    
521                    org.apache.lucene.search.Query luceneQuery =
522                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
523                                    query);
524    
525                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
526                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
527    
528                    Hits hits = new HitsImpl();
529    
530                    if ((start < 0) || (start > end)) {
531                            return hits;
532                    }
533    
534                    int subsetTotal = end - start;
535    
536                    if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
537                            subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
538                    }
539    
540                    List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
541                    List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
542    
543                    FieldSelector fieldSelector = null;
544    
545                    QueryConfig queryConfig = query.getQueryConfig();
546    
547                    String[] selectedFieldNames = queryConfig.getSelectedFieldNames();
548    
549                    if (ArrayUtil.isNotEmpty(selectedFieldNames) &&
550                            !selectedFieldNames[0].equals(Field.ANY)) {
551    
552                            fieldSelector = new SetBasedFieldSelector(
553                                    SetUtil.fromArray(selectedFieldNames),
554                                    Collections.<String>emptySet());
555                    }
556    
557                    for (int i = start; i < start + subsetTotal; i++) {
558                            int docId = browseHits[i].getDocid();
559    
560                            org.apache.lucene.document.Document document = indexSearcher.doc(
561                                    docId, fieldSelector);
562    
563                            Document subsetDocument = getDocument(document);
564    
565                            String[] highlightFieldNames = queryConfig.getHighlightFieldNames();
566    
567                            for (String highlightFieldName : highlightFieldNames) {
568                                    getSnippet(
569                                            document, query, highlightFieldName,
570                                            queryConfig.getLocale(), subsetDocument, queryTerms);
571                            }
572    
573                            subsetDocs.add(subsetDocument);
574    
575                            Float subsetScore = browseHits[i].getScore();
576    
577                            if (scoredFieldNamesCount > 0) {
578                                    subsetScore = subsetScore / scoredFieldNamesCount;
579                            }
580    
581                            subsetScores.add(subsetScore);
582    
583                            if (_log.isDebugEnabled()) {
584                                    try {
585                                            Explanation explanation = indexSearcher.explain(
586                                                    luceneQuery, docId);
587    
588                                            _log.debug(explanation.toString());
589                                    }
590                                    catch (Exception e) {
591                                    }
592                            }
593                    }
594    
595                    if (!queryConfig.isHighlightEnabled()) {
596                            queryTerms = getQueryTerms(query);
597                    }
598    
599                    hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
600                    hits.setLength(total);
601                    hits.setQuery(query);
602                    hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()]));
603                    hits.setScores(ArrayUtil.toFloatArray(subsetScores));
604                    hits.setSearchTime(searchTime);
605                    hits.setStart(startTime);
606    
607                    return hits;
608            }
609    
610            private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
611    
612            private static java.lang.reflect.Field _runtimeFacetDataMapField;
613            private static java.lang.reflect.Field _runtimeFacetHandlerMapField;
614    
615            static {
616                    try {
617                            _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
618                                    BoboIndexReader.class, "_runtimeFacetDataMap");
619                            _runtimeFacetHandlerMapField = ReflectionUtil.getDeclaredField(
620                                    BoboIndexReader.class, "_runtimeFacetHandlerMap");
621                    }
622                    catch (Exception e) {
623                            throw new ExceptionInInitializerError(e);
624                    }
625            }
626    
627            private class TermCollectingFormatter implements Formatter {
628    
629                    public Set<String> getTerms() {
630                            return _terms;
631                    }
632    
633                    @Override
634                    public String highlightTerm(
635                            String originalText, TokenGroup tokenGroup) {
636    
637                            if (tokenGroup.getTotalScore() > 0) {
638                                    _terms.add(originalText);
639                            }
640    
641                            return originalText;
642                    }
643    
644                    private Set<String> _terms = new HashSet<String>();
645    
646            }
647    
648    }