001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * The contents of this file are subject to the terms of the Liferay Enterprise
005     * Subscription License ("License"). You may not use this file except in
006     * compliance with the License. You can obtain a copy of the License by
007     * contacting Liferay, Inc. See the License for the specific language governing
008     * permissions and limitations under the License, including but not limited to
009     * distribution rights of the Software.
010     *
011     *
012     *
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.Browsable;
020    import com.browseengine.bobo.api.BrowseHit;
021    import com.browseengine.bobo.api.BrowseRequest;
022    import com.browseengine.bobo.api.BrowseResult;
023    import com.browseengine.bobo.api.FacetAccessible;
024    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031    
032    import com.liferay.portal.kernel.dao.orm.QueryUtil;
033    import com.liferay.portal.kernel.json.JSONArray;
034    import com.liferay.portal.kernel.json.JSONObject;
035    import com.liferay.portal.kernel.log.Log;
036    import com.liferay.portal.kernel.log.LogFactoryUtil;
037    import com.liferay.portal.kernel.search.Document;
038    import com.liferay.portal.kernel.search.DocumentImpl;
039    import com.liferay.portal.kernel.search.Field;
040    import com.liferay.portal.kernel.search.Hits;
041    import com.liferay.portal.kernel.search.HitsImpl;
042    import com.liferay.portal.kernel.search.IndexSearcher;
043    import com.liferay.portal.kernel.search.ParseException;
044    import com.liferay.portal.kernel.search.Query;
045    import com.liferay.portal.kernel.search.QueryConfig;
046    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047    import com.liferay.portal.kernel.search.SearchContext;
048    import com.liferay.portal.kernel.search.SearchException;
049    import com.liferay.portal.kernel.search.Sort;
050    import com.liferay.portal.kernel.search.facet.Facet;
051    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052    import com.liferay.portal.kernel.search.facet.RangeFacet;
053    import com.liferay.portal.kernel.search.facet.SimpleFacet;
054    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056    import com.liferay.portal.kernel.util.ArrayUtil;
057    import com.liferay.portal.kernel.util.StringPool;
058    import com.liferay.portal.kernel.util.StringUtil;
059    import com.liferay.portal.kernel.util.Time;
060    import com.liferay.portal.kernel.util.Validator;
061    import com.liferay.portal.search.BoboFacetCollector;
062    import com.liferay.portal.util.PropsValues;
063    
064    import java.io.IOException;
065    
066    import java.util.ArrayList;
067    import java.util.List;
068    import java.util.Locale;
069    import java.util.Map;
070    
071    import org.apache.lucene.document.NumericField;
072    import org.apache.lucene.index.IndexReader;
073    import org.apache.lucene.search.BooleanQuery;
074    import org.apache.lucene.search.Explanation;
075    import org.apache.lucene.search.ScoreDoc;
076    import org.apache.lucene.search.SortField;
077    import org.apache.lucene.search.TopFieldDocs;
078    
079    /**
080     * @author Bruno Farache
081     */
082    public class LuceneIndexSearcherImpl implements IndexSearcher {
083    
084            public Hits search(SearchContext searchContext, Query query)
085                    throws SearchException {
086    
087                    if (_log.isDebugEnabled()) {
088                            _log.debug("Query " + query);
089                    }
090    
091                    Hits hits = null;
092    
093                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
094                    Map<String, Facet> facets = null;
095                    BrowseRequest browseRequest = null;
096                    Browsable browsable = null;
097    
098                    try {
099                            indexSearcher = LuceneHelperUtil.getSearcher(
100                                    searchContext.getCompanyId(), true);
101    
102                            List<FacetHandler<?>> facetHandlers =
103                                    new ArrayList<FacetHandler<?>>();
104    
105                            facets = searchContext.getFacets();
106    
107                            for (Facet facet : facets.values()) {
108                                    if (facet.isStatic()) {
109                                            continue;
110                                    }
111    
112                                    FacetConfiguration facetConfiguration =
113                                            facet.getFacetConfiguration();
114    
115                                    if (facet instanceof MultiValueFacet) {
116                                            MultiValueFacetHandler multiValueFacetHandler =
117                                                    new MultiValueFacetHandler(
118                                                            facetConfiguration.getFieldName(),
119                                                            facetConfiguration.getFieldName());
120    
121                                            JSONObject dataJSONObject = facetConfiguration.getData();
122    
123                                            if (dataJSONObject.has("maxTerms")) {
124                                                    multiValueFacetHandler.setMaxItems(
125                                                            dataJSONObject.getInt("maxTerms"));
126                                            }
127    
128                                            facetHandlers.add(multiValueFacetHandler);
129                                    }
130                                    else if (facet instanceof RangeFacet) {
131                                            List<String> ranges = new ArrayList<String>();
132    
133                                            JSONObject dataJSONObject = facetConfiguration.getData();
134    
135                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
136                                                    "ranges");
137    
138                                            if (rangesJSONArray != null) {
139                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
140                                                            JSONObject rangeJSONObject =
141                                                                    rangesJSONArray.getJSONObject(i);
142    
143                                                            ranges.add(rangeJSONObject.getString("range"));
144                                                    }
145                                            }
146    
147                                            RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
148                                                    facetConfiguration.getFieldName(),
149                                                    facetConfiguration.getFieldName(), ranges);
150    
151                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
152    
153                                            facetHandlers.add(rangeFacetHandler);
154                                    }
155                                    else if (facet instanceof SimpleFacet) {
156                                            SimpleFacetHandler simpleFacetHandler =
157                                                    new SimpleFacetHandler(
158                                                            facetConfiguration.getFieldName(),
159                                                            facetConfiguration.getFieldName());
160    
161                                            facetHandlers.add(simpleFacetHandler);
162                                    }
163                            }
164    
165                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
166                                    indexSearcher.getIndexReader(), facetHandlers);
167    
168                            SortField[] sortFields = new SortField[0];
169    
170                            Sort[] sorts = searchContext.getSorts();
171    
172                            if (sorts != null) {
173                                    sortFields = new SortField[sorts.length];
174    
175                                    for (int i = 0; i < sorts.length; i++) {
176                                            Sort sort = sorts[i];
177    
178                                            sortFields[i] = new SortField(
179                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
180                                    }
181                            }
182    
183                            browseRequest = new BrowseRequest();
184    
185                            for (Facet facet : facets.values()) {
186                                    if (facet.isStatic()) {
187                                            continue;
188                                    }
189    
190                                    FacetConfiguration facetConfiguration =
191                                            facet.getFacetConfiguration();
192    
193                                    FacetSpec facetSpec = new FacetSpec();
194    
195                                    facetSpec.setOrderBy(
196                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
197    
198                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
199                            }
200    
201                            browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
202                            browseRequest.setOffset(0);
203                            browseRequest.setQuery(
204                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
205                                            query));
206                            browseRequest.setSort(sortFields);
207    
208                            browsable = new BoboBrowser(boboIndexReader);
209    
210                            long startTime = System.currentTimeMillis();
211    
212                            BrowseResult browseResult = browsable.browse(browseRequest);
213    
214                            BrowseHit[] browseHits = browseResult.getHits();
215    
216                            long endTime = System.currentTimeMillis();
217    
218                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
219    
220                            hits = toHits(
221                                    indexSearcher, new HitDocs(browseHits), query, startTime,
222                                    searchTime, searchContext.getStart(), searchContext.getEnd());
223    
224                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
225    
226                            for (Map.Entry<String, FacetAccessible> entry :
227                                            facetMap.entrySet()) {
228    
229                                    Facet facet = facets.get(entry.getKey());
230    
231                                    FacetAccessible facetAccessible = entry.getValue();
232    
233                                    FacetCollector facetCollector = new BoboFacetCollector(
234                                            entry.getKey(), facetAccessible);
235    
236                                    facet.setFacetCollector(facetCollector);
237                            }
238                    }
239                    catch (BooleanQuery.TooManyClauses tmc) {
240                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
241    
242                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
243    
244                            try {
245                                    long startTime = System.currentTimeMillis();
246    
247                                    BrowseResult result = browsable.browse(browseRequest);
248    
249                                    BrowseHit[] browseHits = result.getHits();
250    
251                                    long endTime = System.currentTimeMillis();
252    
253                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
254    
255                                    hits = toHits(
256                                            indexSearcher, new HitDocs(browseHits), query, startTime,
257                                            searchTime, searchContext.getStart(),
258                                            searchContext.getEnd());
259    
260                                    Map<String, FacetAccessible> facetMap = result.getFacetMap();
261    
262                                    for (Map.Entry<String, FacetAccessible> entry :
263                                                    facetMap.entrySet()) {
264    
265                                            Facet facet = facets.get(entry.getKey());
266    
267                                            FacetAccessible facetAccessible = entry.getValue();
268    
269                                            FacetCollector facetCollector = new BoboFacetCollector(
270                                                    entry.getKey(), facetAccessible);
271    
272                                            facet.setFacetCollector(facetCollector);
273                                    }
274                            }
275                            catch (Exception e) {
276                                    throw new SearchException(e);
277                            }
278                            finally {
279                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
280                            }
281                    }
282                    catch (ParseException pe) {
283                            _log.error("Query " + query, pe);
284    
285                            return new HitsImpl();
286                    }
287                    catch (Exception e) {
288                            throw new SearchException(e);
289                    }
290                    finally {
291                            if (browsable != null) {
292                                    try {
293                                            browsable.close();
294                                    }
295                                    catch (IOException ioe) {
296                                            _log.error(ioe, ioe);
297                                    }
298                            }
299    
300                            if (indexSearcher != null) {
301                                    try {
302                                            indexSearcher.close();
303                                    }
304                                    catch (IOException ioe) {
305                                            _log.error(ioe, ioe);
306                                    }
307                            }
308                    }
309    
310                    if (_log.isDebugEnabled()) {
311                            _log.debug(
312                                    "Search found " + hits.getLength() + " results in " +
313                                            hits.getSearchTime() + "ms");
314                    }
315    
316                    return hits;
317            }
318    
319            public Hits search(
320                            String searchEngineId, long companyId, Query query, Sort[] sorts,
321                            int start, int end)
322                    throws SearchException {
323    
324                    if (_log.isDebugEnabled()) {
325                            _log.debug("Query " + query);
326                    }
327    
328                    Hits hits = null;
329    
330                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
331                    org.apache.lucene.search.Sort luceneSort = null;
332    
333                    try {
334                            indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
335    
336                            if (sorts != null) {
337                                    SortField[] sortFields = new SortField[sorts.length];
338    
339                                    for (int i = 0; i < sorts.length; i++) {
340                                            Sort sort = sorts[i];
341    
342                                            sortFields[i] = new SortField(
343                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
344                                    }
345    
346                                    luceneSort = new org.apache.lucene.search.Sort(sortFields);
347                            }
348                            else {
349                                    luceneSort = new org.apache.lucene.search.Sort();
350                            }
351    
352                            long startTime = System.currentTimeMillis();
353    
354                            TopFieldDocs topFieldDocs = indexSearcher.search(
355                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
356                                            query),
357                                    null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
358    
359                            long endTime = System.currentTimeMillis();
360    
361                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
362    
363                            hits = toHits(
364                                    indexSearcher, new HitDocs(topFieldDocs), query, startTime,
365                                    searchTime, start, end);
366                    }
367                    catch (BooleanQuery.TooManyClauses tmc) {
368                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
369    
370                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
371    
372                            try {
373                                    long startTime = System.currentTimeMillis();
374    
375                                    TopFieldDocs topFieldDocs = indexSearcher.search(
376                                            (org.apache.lucene.search.Query)
377                                                    QueryTranslatorUtil.translate(query),
378                                            null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
379    
380                                    long endTime = System.currentTimeMillis();
381    
382                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
383    
384                                    hits = toHits(
385                                            indexSearcher, new HitDocs(topFieldDocs), query, startTime,
386                                            searchTime, start, end);
387                            }
388                            catch (Exception e) {
389                                    throw new SearchException(e);
390                            }
391                            finally {
392                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
393                            }
394                    }
395                    catch (ParseException pe) {
396                            _log.error("Query " + query, pe);
397    
398                            return new HitsImpl();
399                    }
400                    catch (Exception e) {
401                            throw new SearchException(e);
402                    }
403                    finally {
404                            if (indexSearcher != null) {
405                                    try {
406                                            indexSearcher.close();
407                                    }
408                                    catch (IOException ioe) {
409                                            _log.error(ioe, ioe);
410                                    }
411                            }
412                    }
413    
414                    if (_log.isDebugEnabled()) {
415                            _log.debug(
416                                    "Search found " + hits.getLength() + " results in " +
417                                            hits.getSearchTime() + "ms");
418                    }
419    
420                    return hits;
421            }
422    
423            protected DocumentImpl getDocument(
424                    org.apache.lucene.document.Document oldDocument) {
425    
426                    DocumentImpl newDocument = new DocumentImpl();
427    
428                    List<org.apache.lucene.document.Fieldable> oldFieldables =
429                            oldDocument.getFields();
430    
431                    for (org.apache.lucene.document.Fieldable oldFieldable :
432                                    oldFieldables) {
433    
434                            Field newField = null;
435    
436                            String[] values = oldDocument.getValues(oldFieldable.name());
437    
438                            if ((values != null) && (values.length > 1)) {
439                                    newField = new Field(oldFieldable.name(), values);
440                            }
441                            else {
442                                    newField = new Field(
443                                            oldFieldable.name(), oldFieldable.stringValue());
444                            }
445    
446                            newField.setNumeric(oldFieldable instanceof NumericField);
447                            newField.setTokenized(oldFieldable.isTokenized());
448    
449                            newDocument.add(newField);
450                    }
451    
452                    return newDocument;
453            }
454    
455            protected String[] getQueryTerms(Query query) {
456                    String[] queryTerms = new String[0];
457    
458                    try {
459                            queryTerms = LuceneHelperUtil.getQueryTerms(
460                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
461                                            query));
462                    }
463                    catch (ParseException pe) {
464                            _log.error("Query " + query, pe);
465                    }
466    
467                    return queryTerms;
468            }
469    
470            protected String getSnippet(
471                            org.apache.lucene.document.Document doc, Query query, String field,
472                            Locale locale)
473                    throws IOException {
474    
475                    String localizedName = DocumentImpl.getLocalizedName(locale, field);
476    
477                    String[] values = doc.getValues(localizedName);
478    
479                    if ((values == null) || (values.length == 0)) {
480                            values = doc.getValues(field);
481                    }
482    
483                    String snippet = null;
484    
485                    if (Validator.isNull(values)) {
486                            return snippet;
487                    }
488    
489                    String s = StringUtil.merge(values);
490    
491                    try {
492                            snippet = LuceneHelperUtil.getSnippet(
493                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
494                                            query),
495                                    field, s);
496                    }
497                    catch (ParseException pe) {
498                            _log.error("Query " + query, pe);
499                    }
500    
501                    return snippet;
502            }
503    
504            protected Hits toHits(
505                            org.apache.lucene.search.IndexSearcher indexSearcher,
506                            HitDocs hitDocs, Query query, long startTime, float searchTime,
507                            int start, int end)
508                    throws IOException, ParseException {
509    
510                    int length = hitDocs.getTotalHits();
511    
512                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
513                            start = 0;
514                            end = length;
515                    }
516    
517                    String[] queryTerms = getQueryTerms(query);
518    
519                    IndexReader indexReader = indexSearcher.getIndexReader();
520    
521                    List<String> indexedFieldNames = new ArrayList<String> (
522                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
523    
524                    org.apache.lucene.search.Query luceneQuery =
525                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
526                                    query);
527    
528                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
529                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
530    
531                    Hits hits = new HitsImpl();
532    
533                    if ((start > -1) && (start <= end)) {
534                            if (end > length) {
535                                    end = length;
536                            }
537    
538                            if (start > end) {
539                                    start = end;
540                            }
541    
542                            int subsetTotal = end - start;
543    
544                            if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
545                                    subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
546                            }
547    
548                            List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
549                            List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
550                            List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
551    
552                            QueryConfig queryConfig = query.getQueryConfig();
553    
554                            for (int i = start; i < end; i++) {
555                                    if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
556                                            break;
557                                    }
558    
559                                    int docId = hitDocs.getDocId(i);
560    
561                                    org.apache.lucene.document.Document document =
562                                            indexSearcher.doc(docId);
563    
564                                    Document subsetDocument = getDocument(document);
565    
566                                    String subsetSnippet = StringPool.BLANK;
567    
568                                    if (queryConfig.isHighlightEnabled()) {
569                                            subsetSnippet = getSnippet(
570                                                    document, query, Field.CONTENT,
571                                                    queryConfig.getLocale());
572                                    }
573    
574                                    subsetDocument.addText(Field.SNIPPET, subsetSnippet);
575    
576                                    subsetSnippets.add(subsetSnippet);
577    
578                                    subsetDocs.add(subsetDocument);
579    
580                                    Float subsetScore = hitDocs.getScore(i);
581    
582                                    if (scoredFieldNamesCount > 0) {
583                                            subsetScore = subsetScore / scoredFieldNamesCount;
584                                    }
585    
586                                    subsetScores.add(subsetScore);
587    
588                                    if (_log.isDebugEnabled()) {
589                                            try {
590                                                    Explanation explanation = indexSearcher.explain(
591                                                            luceneQuery, docId);
592    
593                                                    _log.debug(explanation.toString());
594                                            }
595                                            catch (Exception e) {
596                                            }
597                                    }
598                            }
599    
600                            hits.setStart(startTime);
601                            hits.setSearchTime(searchTime);
602                            hits.setQuery(query);
603                            hits.setQueryTerms(queryTerms);
604                            hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
605                            hits.setLength(length);
606                            hits.setSnippets(
607                                    subsetSnippets.toArray(new String[subsetSnippets.size()]));
608                            hits.setScores(
609                                    subsetScores.toArray(new Float[subsetScores.size()]));
610                    }
611    
612                    return hits;
613            }
614    
615            private static Log _log = LogFactoryUtil.getLog(
616                    LuceneIndexSearcherImpl.class);
617    
618            private class HitDocs {
619    
620                    public HitDocs(BrowseHit[] browseHits) {
621                            _browseHits = browseHits;
622                    }
623    
624                    public HitDocs(TopFieldDocs topFieldDocs) {
625                            _topFieldDocs = topFieldDocs;
626                    }
627    
628                    public int getDocId(int i) {
629                            if (_topFieldDocs != null) {
630                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
631    
632                                    return scoreDoc.doc;
633                            }
634                            else if (_browseHits != null) {
635                                    return _browseHits[i].getDocid();
636                            }
637    
638                            throw new IllegalStateException();
639                    }
640    
641                    public float getScore(int i) {
642                            if (_topFieldDocs != null) {
643                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
644    
645                                    return scoreDoc.score;
646                            }
647                            else if (_browseHits != null) {
648                                    return _browseHits[i].getScore();
649                            }
650    
651                            throw new IllegalStateException();
652                    }
653    
654                    public int getTotalHits() {
655                            if (_topFieldDocs != null) {
656                                    return _topFieldDocs.totalHits;
657                            }
658                            else if (_browseHits != null) {
659                                    return _browseHits.length;
660                            }
661    
662                            throw new IllegalStateException();
663                    }
664    
665                    private BrowseHit[] _browseHits;
666                    private TopFieldDocs _topFieldDocs;
667    
668            }
669    
670    }