001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.BoboSubBrowser;
020    import com.browseengine.bobo.api.Browsable;
021    import com.browseengine.bobo.api.BrowseHit;
022    import com.browseengine.bobo.api.BrowseRequest;
023    import com.browseengine.bobo.api.BrowseResult;
024    import com.browseengine.bobo.api.FacetAccessible;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
029    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032    
033    import com.liferay.portal.kernel.dao.orm.QueryUtil;
034    import com.liferay.portal.kernel.dao.search.SearchPaginationUtil;
035    import com.liferay.portal.kernel.json.JSONArray;
036    import com.liferay.portal.kernel.json.JSONObject;
037    import com.liferay.portal.kernel.log.Log;
038    import com.liferay.portal.kernel.log.LogFactoryUtil;
039    import com.liferay.portal.kernel.search.BaseIndexSearcher;
040    import com.liferay.portal.kernel.search.Document;
041    import com.liferay.portal.kernel.search.DocumentImpl;
042    import com.liferay.portal.kernel.search.Field;
043    import com.liferay.portal.kernel.search.Hits;
044    import com.liferay.portal.kernel.search.HitsImpl;
045    import com.liferay.portal.kernel.search.ParseException;
046    import com.liferay.portal.kernel.search.Query;
047    import com.liferay.portal.kernel.search.QueryConfig;
048    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
049    import com.liferay.portal.kernel.search.SearchContext;
050    import com.liferay.portal.kernel.search.SearchException;
051    import com.liferay.portal.kernel.search.Sort;
052    import com.liferay.portal.kernel.search.facet.Facet;
053    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
054    import com.liferay.portal.kernel.search.facet.RangeFacet;
055    import com.liferay.portal.kernel.search.facet.SimpleFacet;
056    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
057    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
058    import com.liferay.portal.kernel.util.ArrayUtil;
059    import com.liferay.portal.kernel.util.ReflectionUtil;
060    import com.liferay.portal.kernel.util.SetUtil;
061    import com.liferay.portal.kernel.util.StringPool;
062    import com.liferay.portal.kernel.util.StringUtil;
063    import com.liferay.portal.kernel.util.Time;
064    import com.liferay.portal.kernel.util.Validator;
065    import com.liferay.portal.search.BoboFacetCollector;
066    import com.liferay.portal.util.PropsValues;
067    
068    import java.io.IOException;
069    
070    import java.util.ArrayList;
071    import java.util.Collections;
072    import java.util.HashSet;
073    import java.util.List;
074    import java.util.Locale;
075    import java.util.Map;
076    import java.util.Set;
077    
078    import org.apache.lucene.document.FieldSelector;
079    import org.apache.lucene.document.NumericField;
080    import org.apache.lucene.document.SetBasedFieldSelector;
081    import org.apache.lucene.index.IndexReader;
082    import org.apache.lucene.search.BooleanQuery;
083    import org.apache.lucene.search.Explanation;
084    import org.apache.lucene.search.IndexSearcher;
085    import org.apache.lucene.search.SortField;
086    import org.apache.lucene.search.highlight.Formatter;
087    import org.apache.lucene.search.highlight.TokenGroup;
088    
089    /**
090     * @author Bruno Farache
091     */
092    public class LuceneIndexSearcher extends BaseIndexSearcher {
093    
094            @Override
095            public Hits search(SearchContext searchContext, Query query)
096                    throws SearchException {
097    
098                    if (_log.isDebugEnabled()) {
099                            _log.debug("Query " + query);
100                    }
101    
102                    Hits hits = null;
103    
104                    IndexSearcher indexSearcher = null;
105                    Map<String, Facet> facets = null;
106                    BoboBrowser boboBrowser = null;
107                    BrowseRequest browseRequest = null;
108    
109                    try {
110                            indexSearcher = LuceneHelperUtil.getIndexSearcher(
111                                    searchContext.getCompanyId());
112    
113                            List<FacetHandler<?>> facetHandlers =
114                                    new ArrayList<FacetHandler<?>>();
115    
116                            facets = searchContext.getFacets();
117    
118                            for (Facet facet : facets.values()) {
119                                    if (facet.isStatic()) {
120                                            continue;
121                                    }
122    
123                                    FacetConfiguration facetConfiguration =
124                                            facet.getFacetConfiguration();
125    
126                                    if (facet instanceof MultiValueFacet) {
127                                            MultiValueFacetHandler multiValueFacetHandler =
128                                                    new MultiValueFacetHandler(
129                                                            facetConfiguration.getFieldName(),
130                                                            facetConfiguration.getFieldName());
131    
132                                            JSONObject dataJSONObject = facetConfiguration.getData();
133    
134                                            if (dataJSONObject.has("maxTerms")) {
135                                                    multiValueFacetHandler.setMaxItems(
136                                                            dataJSONObject.getInt("maxTerms"));
137                                            }
138    
139                                            facetHandlers.add(multiValueFacetHandler);
140                                    }
141                                    else if (facet instanceof RangeFacet) {
142                                            List<String> ranges = new ArrayList<String>();
143    
144                                            JSONObject dataJSONObject = facetConfiguration.getData();
145    
146                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
147                                                    "ranges");
148    
149                                            if (rangesJSONArray != null) {
150                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
151                                                            JSONObject rangeJSONObject =
152                                                                    rangesJSONArray.getJSONObject(i);
153    
154                                                            ranges.add(rangeJSONObject.getString("range"));
155                                                    }
156                                            }
157    
158                                            RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
159                                                    facetConfiguration.getFieldName(),
160                                                    facetConfiguration.getFieldName(), ranges);
161    
162                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
163    
164                                            facetHandlers.add(rangeFacetHandler);
165                                    }
166                                    else if (facet instanceof SimpleFacet) {
167                                            SimpleFacetHandler simpleFacetHandler =
168                                                    new SimpleFacetHandler(
169                                                            facetConfiguration.getFieldName(),
170                                                            facetConfiguration.getFieldName());
171    
172                                            facetHandlers.add(simpleFacetHandler);
173                                    }
174                            }
175    
176                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
177                                    indexSearcher.getIndexReader(), facetHandlers);
178    
179                            SortField[] sortFields = new SortField[0];
180    
181                            Sort[] sorts = searchContext.getSorts();
182    
183                            if (sorts != null) {
184                                    sortFields = new SortField[sorts.length];
185    
186                                    for (int i = 0; i < sorts.length; i++) {
187                                            Sort sort = sorts[i];
188    
189                                            if ((sort.getType() == Sort.STRING_TYPE) &&
190                                                    (searchContext.getLocale() != null)) {
191    
192                                                    sortFields[i] = new SortField(
193                                                            sort.getFieldName(), searchContext.getLocale(),
194                                                            sort.isReverse());
195                                            }
196                                            else {
197                                                    sortFields[i] = new SortField(
198                                                            sort.getFieldName(), sort.getType(),
199                                                            sort.isReverse());
200                                            }
201                                    }
202                            }
203    
204                            browseRequest = new BrowseRequest();
205    
206                            for (Facet facet : facets.values()) {
207                                    if (facet.isStatic()) {
208                                            continue;
209                                    }
210    
211                                    FacetConfiguration facetConfiguration =
212                                            facet.getFacetConfiguration();
213    
214                                    FacetSpec facetSpec = new FacetSpec();
215    
216                                    facetSpec.setOrderBy(
217                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
218    
219                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
220                            }
221    
222                            int end = searchContext.getEnd();
223    
224                            if ((end == QueryUtil.ALL_POS) ||
225                                    (end > PropsValues.INDEX_SEARCH_LIMIT)) {
226    
227                                    end = PropsValues.INDEX_SEARCH_LIMIT;
228                            }
229    
230                            browseRequest.setCount(end);
231    
232                            browseRequest.setOffset(0);
233                            browseRequest.setQuery(
234                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
235                                            query));
236                            browseRequest.setSort(sortFields);
237    
238                            boboBrowser = new BoboBrowser(boboIndexReader);
239    
240                            long startTime = System.currentTimeMillis();
241    
242                            BrowseResult browseResult = boboBrowser.browse(browseRequest);
243    
244                            long endTime = System.currentTimeMillis();
245    
246                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
247    
248                            hits = toHits(
249                                    indexSearcher, browseResult, query, startTime, searchTime,
250                                    searchContext.getStart(), searchContext.getEnd());
251    
252                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
253    
254                            for (Map.Entry<String, FacetAccessible> entry :
255                                            facetMap.entrySet()) {
256    
257                                    Facet facet = facets.get(entry.getKey());
258    
259                                    FacetAccessible facetAccessible = entry.getValue();
260    
261                                    FacetCollector facetCollector = new BoboFacetCollector(
262                                            entry.getKey(), facetAccessible);
263    
264                                    facet.setFacetCollector(facetCollector);
265                            }
266                    }
267                    catch (BooleanQuery.TooManyClauses tmc) {
268                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
269    
270                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
271    
272                            try {
273                                    long startTime = System.currentTimeMillis();
274    
275                                    BrowseResult browseResult = boboBrowser.browse(browseRequest);
276    
277                                    long endTime = System.currentTimeMillis();
278    
279                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
280    
281                                    hits = toHits(
282                                            indexSearcher, browseResult, query, startTime, searchTime,
283                                            searchContext.getStart(), searchContext.getEnd());
284    
285                                    Map<String, FacetAccessible> facetMap =
286                                            browseResult.getFacetMap();
287    
288                                    for (Map.Entry<String, FacetAccessible> entry :
289                                                    facetMap.entrySet()) {
290    
291                                            Facet facet = facets.get(entry.getKey());
292    
293                                            FacetAccessible facetAccessible = entry.getValue();
294    
295                                            FacetCollector facetCollector = new BoboFacetCollector(
296                                                    entry.getKey(), facetAccessible);
297    
298                                            facet.setFacetCollector(facetCollector);
299                                    }
300                            }
301                            catch (Exception e) {
302                                    throw new SearchException(e);
303                            }
304                            finally {
305                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
306                            }
307                    }
308                    catch (ParseException pe) {
309                            _log.error("Query " + query, pe);
310    
311                            return new HitsImpl();
312                    }
313                    catch (Exception e) {
314                            throw new SearchException(e);
315                    }
316                    finally {
317                            cleanUp(boboBrowser);
318    
319                            try {
320                                    LuceneHelperUtil.releaseIndexSearcher(
321                                            searchContext.getCompanyId(), indexSearcher);
322                            }
323                            catch (IOException ioe) {
324                                    _log.error("Unable to release searcher", ioe);
325                            }
326                    }
327    
328                    if (_log.isDebugEnabled()) {
329                            _log.debug(
330                                    "Search found " + hits.getLength() + " results in " +
331                                            hits.getSearchTime() + "ms");
332                    }
333    
334                    return hits;
335            }
336    
337            protected void cleanUp(BoboBrowser boboBrowser) {
338                    if (boboBrowser == null) {
339                            return;
340                    }
341    
342                    try {
343                            boboBrowser.close();
344                    }
345                    catch (IOException ioe) {
346                            _log.error(ioe, ioe);
347                    }
348    
349                    Browsable[] browsables = boboBrowser.getSubBrowsers();
350    
351                    for (Browsable browsable : browsables) {
352                            if (!(browsable instanceof BoboSubBrowser)) {
353                                    continue;
354                            }
355    
356                            BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
357    
358                            BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
359    
360                            try {
361                                    ThreadLocal<?> threadLocal =
362                                            (ThreadLocal<?>)_runtimeFacetDataMapField.get(
363                                                    boboIndexReader);
364    
365                                    threadLocal.remove();
366    
367                                    _runtimeFacetDataMapField.set(boboIndexReader, null);
368                            }
369                            catch (Exception e) {
370                                    _log.error(
371                                            "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
372                                            e);
373                            }
374    
375                            try {
376                                    ThreadLocal<?> threadLocal =
377                                            (ThreadLocal<?>)_runtimeFacetHandlerMapField.get(
378                                                    boboIndexReader);
379    
380                                    threadLocal.remove();
381    
382                                    _runtimeFacetHandlerMapField.set(boboIndexReader, null);
383                            }
384                            catch (Exception e) {
385                                    _log.error(
386                                            "Unable to clean up BoboIndexReader#" +
387                                                    "_runtimeFacetHandlerMap",
388                                            e);
389                            }
390                    }
391            }
392    
393            protected DocumentImpl getDocument(
394                    org.apache.lucene.document.Document oldDocument) {
395    
396                    DocumentImpl newDocument = new DocumentImpl();
397    
398                    List<org.apache.lucene.document.Fieldable> oldFieldables =
399                            oldDocument.getFields();
400    
401                    for (org.apache.lucene.document.Fieldable oldFieldable :
402                                    oldFieldables) {
403    
404                            Field newField = null;
405    
406                            String[] values = oldDocument.getValues(oldFieldable.name());
407    
408                            if ((values != null) && (values.length > 1)) {
409                                    newField = new Field(oldFieldable.name(), values);
410                            }
411                            else {
412                                    newField = new Field(
413                                            oldFieldable.name(), oldFieldable.stringValue());
414                            }
415    
416                            newField.setNumeric(oldFieldable instanceof NumericField);
417                            newField.setTokenized(oldFieldable.isTokenized());
418    
419                            newDocument.add(newField);
420                    }
421    
422                    return newDocument;
423            }
424    
425            protected Set<String> getQueryTerms(Query query) {
426                    Set<String> queryTerms = new HashSet<String>();
427    
428                    try {
429                            queryTerms = LuceneHelperUtil.getQueryTerms(
430                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
431                                            query));
432                    }
433                    catch (ParseException pe) {
434                            _log.error("Query " + query, pe);
435                    }
436    
437                    return queryTerms;
438            }
439    
440            protected String getSnippet(
441                            org.apache.lucene.document.Document doc, Query query, String field,
442                            Locale locale, Document hitDoc, Set<String> matchingTerms)
443                    throws IOException {
444    
445                    String snippetField = DocumentImpl.getLocalizedName(locale, field);
446                    String snippet = null;
447    
448                    try {
449                            org.apache.lucene.search.Query luceneQuery =
450                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
451                                            query);
452    
453                            String[] values = doc.getValues(snippetField);
454    
455                            TermCollectingFormatter termCollectingFormatter =
456                                    new TermCollectingFormatter();
457    
458                            if (ArrayUtil.isNotEmpty(values)) {
459                                    snippet = LuceneHelperUtil.getSnippet(
460                                            luceneQuery, snippetField, StringUtil.merge(values),
461                                            termCollectingFormatter);
462                            }
463    
464                            if (ArrayUtil.isEmpty(values) || Validator.isNull(snippet)) {
465                                    snippetField = field;
466    
467                                    values = doc.getValues(snippetField);
468    
469                                    if (ArrayUtil.isEmpty(values)) {
470                                            return StringPool.BLANK;
471                                    }
472    
473                                    snippet = LuceneHelperUtil.getSnippet(
474                                            luceneQuery, field, StringUtil.merge(values),
475                                            termCollectingFormatter);
476                            }
477    
478                            if (Validator.isNull(snippet)) {
479                                    return StringPool.BLANK;
480                            }
481    
482                            matchingTerms.addAll(termCollectingFormatter.getTerms());
483                    }
484                    catch (ParseException pe) {
485                            _log.error("Query " + query, pe);
486                    }
487    
488                    hitDoc.addText(
489                            Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
490                            snippet);
491    
492                    return snippet;
493            }
494    
495            protected Hits toHits(
496                            IndexSearcher indexSearcher, BrowseResult browseResult, Query query,
497                            long startTime, float searchTime, int start, int end)
498                    throws IOException, ParseException {
499    
500                    int total = browseResult.getNumHits();
501    
502                    BrowseHit[] browseHits = browseResult.getHits();
503    
504                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
505                            start = 0;
506                            end = total;
507                    }
508    
509                    int[] startAndEnd = SearchPaginationUtil.calculateStartAndEnd(
510                            start, end, total);
511    
512                    start = startAndEnd[0];
513                    end = startAndEnd[1];
514    
515                    Set<String> queryTerms = new HashSet<String>();
516    
517                    IndexReader indexReader = indexSearcher.getIndexReader();
518    
519                    List<String> indexedFieldNames = new ArrayList<String> (
520                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
521    
522                    org.apache.lucene.search.Query luceneQuery =
523                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
524                                    query);
525    
526                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
527                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
528    
529                    Hits hits = new HitsImpl();
530    
531                    if ((start < 0) || (start > end)) {
532                            return hits;
533                    }
534    
535                    int subsetTotal = end - start;
536    
537                    if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
538                            subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
539                    }
540    
541                    List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
542                    List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
543    
544                    FieldSelector fieldSelector = null;
545    
546                    QueryConfig queryConfig = query.getQueryConfig();
547    
548                    String[] selectedFieldNames = queryConfig.getSelectedFieldNames();
549    
550                    if (ArrayUtil.isNotEmpty(selectedFieldNames) &&
551                            !selectedFieldNames[0].equals(Field.ANY)) {
552    
553                            fieldSelector = new SetBasedFieldSelector(
554                                    SetUtil.fromArray(selectedFieldNames),
555                                    Collections.<String>emptySet());
556                    }
557    
558                    for (int i = start; i < start + subsetTotal; i++) {
559                            int docId = browseHits[i].getDocid();
560    
561                            org.apache.lucene.document.Document document = indexSearcher.doc(
562                                    docId, fieldSelector);
563    
564                            Document subsetDocument = getDocument(document);
565    
566                            String[] highlightFieldNames = queryConfig.getHighlightFieldNames();
567    
568                            for (String highlightFieldName : highlightFieldNames) {
569                                    getSnippet(
570                                            document, query, highlightFieldName,
571                                            queryConfig.getLocale(), subsetDocument, queryTerms);
572                            }
573    
574                            subsetDocs.add(subsetDocument);
575    
576                            Float subsetScore = browseHits[i].getScore();
577    
578                            if (scoredFieldNamesCount > 0) {
579                                    subsetScore = subsetScore / scoredFieldNamesCount;
580                            }
581    
582                            subsetScores.add(subsetScore);
583    
584                            if (_log.isDebugEnabled()) {
585                                    try {
586                                            Explanation explanation = indexSearcher.explain(
587                                                    luceneQuery, docId);
588    
589                                            _log.debug(explanation.toString());
590                                    }
591                                    catch (Exception e) {
592                                    }
593                            }
594                    }
595    
596                    if (!queryConfig.isHighlightEnabled()) {
597                            queryTerms = getQueryTerms(query);
598                    }
599    
600                    hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
601                    hits.setLength(total);
602                    hits.setQuery(query);
603                    hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()]));
604                    hits.setScores(ArrayUtil.toFloatArray(subsetScores));
605                    hits.setSearchTime(searchTime);
606                    hits.setStart(startTime);
607    
608                    return hits;
609            }
610    
611            private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
612    
613            private static java.lang.reflect.Field _runtimeFacetDataMapField;
614            private static java.lang.reflect.Field _runtimeFacetHandlerMapField;
615    
616            static {
617                    try {
618                            _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
619                                    BoboIndexReader.class, "_runtimeFacetDataMap");
620                            _runtimeFacetHandlerMapField = ReflectionUtil.getDeclaredField(
621                                    BoboIndexReader.class, "_runtimeFacetHandlerMap");
622                    }
623                    catch (Exception e) {
624                            throw new ExceptionInInitializerError(e);
625                    }
626            }
627    
628            private class TermCollectingFormatter implements Formatter {
629    
630                    public Set<String> getTerms() {
631                            return _terms;
632                    }
633    
634                    @Override
635                    public String highlightTerm(
636                            String originalText, TokenGroup tokenGroup) {
637    
638                            if (tokenGroup.getTotalScore() > 0) {
639                                    _terms.add(originalText);
640                            }
641    
642                            return originalText;
643                    }
644    
645                    private Set<String> _terms = new HashSet<String>();
646    
647            }
648    
649    }