001    /**
002     * Copyright (c) 2000-2011 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.Browsable;
020    import com.browseengine.bobo.api.BrowseHit;
021    import com.browseengine.bobo.api.BrowseRequest;
022    import com.browseengine.bobo.api.BrowseResult;
023    import com.browseengine.bobo.api.FacetAccessible;
024    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031    
032    import com.liferay.portal.kernel.dao.orm.QueryUtil;
033    import com.liferay.portal.kernel.json.JSONArray;
034    import com.liferay.portal.kernel.json.JSONObject;
035    import com.liferay.portal.kernel.log.Log;
036    import com.liferay.portal.kernel.log.LogFactoryUtil;
037    import com.liferay.portal.kernel.search.Document;
038    import com.liferay.portal.kernel.search.DocumentImpl;
039    import com.liferay.portal.kernel.search.Field;
040    import com.liferay.portal.kernel.search.Hits;
041    import com.liferay.portal.kernel.search.HitsImpl;
042    import com.liferay.portal.kernel.search.IndexSearcher;
043    import com.liferay.portal.kernel.search.ParseException;
044    import com.liferay.portal.kernel.search.Query;
045    import com.liferay.portal.kernel.search.QueryConfig;
046    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047    import com.liferay.portal.kernel.search.SearchContext;
048    import com.liferay.portal.kernel.search.SearchException;
049    import com.liferay.portal.kernel.search.Sort;
050    import com.liferay.portal.kernel.search.facet.Facet;
051    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052    import com.liferay.portal.kernel.search.facet.RangeFacet;
053    import com.liferay.portal.kernel.search.facet.SimpleFacet;
054    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056    import com.liferay.portal.kernel.util.ArrayUtil;
057    import com.liferay.portal.kernel.util.StringPool;
058    import com.liferay.portal.kernel.util.StringUtil;
059    import com.liferay.portal.kernel.util.Time;
060    import com.liferay.portal.kernel.util.Validator;
061    import com.liferay.portal.search.BoboFacetCollector;
062    import com.liferay.portal.util.PropsValues;
063    
064    import java.io.IOException;
065    
066    import java.util.ArrayList;
067    import java.util.List;
068    import java.util.Locale;
069    import java.util.Map;
070    
071    import org.apache.lucene.document.NumericField;
072    import org.apache.lucene.index.IndexReader;
073    import org.apache.lucene.search.BooleanQuery;
074    import org.apache.lucene.search.Explanation;
075    import org.apache.lucene.search.ScoreDoc;
076    import org.apache.lucene.search.SortField;
077    import org.apache.lucene.search.TopFieldDocs;
078    
079    /**
080     * @author Bruno Farache
081     */
082    public class LuceneIndexSearcherImpl implements IndexSearcher {
083    
084            public Hits search(
085                            String searchEngineId, long companyId, Query query, Sort[] sorts,
086                            int start, int end)
087                    throws SearchException {
088    
089                    if (_log.isDebugEnabled()) {
090                            _log.debug("Query " + query);
091                    }
092    
093                    Hits hits = null;
094    
095                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
096                    org.apache.lucene.search.Sort luceneSort = null;
097    
098                    try {
099                            indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
100    
101                            if (sorts != null) {
102                                    SortField[] sortFields = new SortField[sorts.length];
103    
104                                    for (int i = 0; i < sorts.length; i++) {
105                                            Sort sort = sorts[i];
106    
107                                            sortFields[i] = new SortField(
108                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
109                                    }
110    
111                                    luceneSort = new org.apache.lucene.search.Sort(sortFields);
112                            }
113                            else {
114                                    luceneSort = new org.apache.lucene.search.Sort();
115                            }
116    
117                            long startTime = System.currentTimeMillis();
118    
119                            TopFieldDocs topFieldDocs = indexSearcher.search(
120                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
121                                            query),
122                                    null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
123    
124                            long endTime = System.currentTimeMillis();
125    
126                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
127    
128                            hits = toHits(
129                                    indexSearcher, new HitDocs(topFieldDocs), query, startTime,
130                                    searchTime, start, end);
131                    }
132                    catch (BooleanQuery.TooManyClauses tmc) {
133                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
134    
135                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
136    
137                            try {
138                                    long startTime = System.currentTimeMillis();
139    
140                                    TopFieldDocs topFieldDocs = indexSearcher.search(
141                                            (org.apache.lucene.search.Query)
142                                                    QueryTranslatorUtil.translate(query),
143                                            null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
144    
145                                    long endTime = System.currentTimeMillis();
146    
147                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
148    
149                                    hits = toHits(
150                                            indexSearcher, new HitDocs(topFieldDocs), query, startTime,
151                                            searchTime, start, end);
152                            }
153                            catch (Exception e) {
154                                    throw new SearchException(e);
155                            }
156                            finally {
157                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
158                            }
159                    }
160                    catch (ParseException pe) {
161                            _log.error("Query " + query, pe);
162    
163                            return new HitsImpl();
164                    }
165                    catch (Exception e) {
166                            throw new SearchException(e);
167                    }
168                    finally {
169                            if (indexSearcher != null) {
170                                    try {
171                                            indexSearcher.close();
172                                    }
173                                    catch (IOException ioe) {
174                                            _log.error(ioe, ioe);
175                                    }
176                            }
177                    }
178    
179                    if (_log.isDebugEnabled()) {
180                            _log.debug(
181                                    "Search found " + hits.getLength() + " results in " +
182                                            hits.getSearchTime() + "ms");
183                    }
184    
185                    return hits;
186            }
187    
188            public Hits search(SearchContext searchContext, Query query)
189                    throws SearchException {
190    
191                    if (_log.isDebugEnabled()) {
192                            _log.debug("Query " + query);
193                    }
194    
195                    Hits hits = null;
196    
197                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
198                    Map<String, Facet> facets = null;
199                    BrowseRequest browseRequest = null;
200                    Browsable browsable = null;
201    
202                    try {
203                            indexSearcher = LuceneHelperUtil.getSearcher(
204                                    searchContext.getCompanyId(), true);
205    
206                            List<FacetHandler<?>> facetHandlers =
207                                    new ArrayList<FacetHandler<?>>();
208    
209                            facets = searchContext.getFacets();
210    
211                            for (Facet facet : facets.values()) {
212                                    if (facet.isStatic()) {
213                                            continue;
214                                    }
215    
216                                    FacetConfiguration facetConfiguration =
217                                            facet.getFacetConfiguration();
218    
219                                    if (facet instanceof MultiValueFacet) {
220                                            MultiValueFacetHandler multiValueFacetHandler =
221                                                    new MultiValueFacetHandler(
222                                                            facetConfiguration.getFieldName(),
223                                                            facetConfiguration.getFieldName());
224    
225                                            JSONObject dataJSONObject = facetConfiguration.getData();
226    
227                                            if (dataJSONObject.has("maxTerms")) {
228                                                    multiValueFacetHandler.setMaxItems(
229                                                            dataJSONObject.getInt("maxTerms"));
230                                            }
231    
232                                            facetHandlers.add(multiValueFacetHandler);
233                                    }
234                                    else if (facet instanceof RangeFacet) {
235                                            List<String> ranges = new ArrayList<String>();
236    
237                                            JSONObject dataJSONObject = facetConfiguration.getData();
238    
239                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
240                                                    "ranges");
241    
242                                            if (rangesJSONArray != null) {
243                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
244                                                            JSONObject rangeJSONObject =
245                                                                    rangesJSONArray.getJSONObject(i);
246    
247                                                            ranges.add(rangeJSONObject.getString("range"));
248                                                    }
249                                            }
250    
251                                            RangeFacetHandler rangeFacetHandler =
252                                                    new RangeFacetHandler(
253                                                            facetConfiguration.getFieldName(),
254                                                            facetConfiguration.getFieldName(), ranges);
255    
256                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
257    
258                                            facetHandlers.add(rangeFacetHandler);
259                                    }
260                                    else if (facet instanceof SimpleFacet) {
261                                            SimpleFacetHandler simpleFacetHandler =
262                                                    new SimpleFacetHandler(
263                                                            facetConfiguration.getFieldName(),
264                                                            facetConfiguration.getFieldName());
265    
266                                            facetHandlers.add(simpleFacetHandler);
267                                    }
268                            }
269    
270                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
271                                    indexSearcher.getIndexReader(), facetHandlers);
272    
273                            SortField[] sortFields = new SortField[0];
274    
275                            Sort[] sorts = searchContext.getSorts();
276    
277                            if (sorts != null) {
278                                    sortFields = new SortField[sorts.length];
279    
280                                    for (int i = 0; i < sorts.length; i++) {
281                                            Sort sort = sorts[i];
282    
283                                            sortFields[i] = new SortField(
284                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
285                                    }
286                            }
287    
288                            browseRequest = new BrowseRequest();
289    
290                            for (Facet facet : facets.values()) {
291                                    if (facet.isStatic()) {
292                                            continue;
293                                    }
294    
295                                    FacetConfiguration facetConfiguration =
296                                            facet.getFacetConfiguration();
297    
298                                    FacetSpec facetSpec = new FacetSpec();
299    
300                                    facetSpec.setOrderBy(
301                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
302    
303                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
304                            }
305    
306                            browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
307                            browseRequest.setOffset(0);
308                            browseRequest.setQuery(
309                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
310                                            query));
311                            browseRequest.setSort(sortFields);
312    
313                            browsable = new BoboBrowser(boboIndexReader);
314    
315                            long startTime = System.currentTimeMillis();
316    
317                            BrowseResult browseResult = browsable.browse(browseRequest);
318    
319                            BrowseHit[] browseHits = browseResult.getHits();
320    
321                            long endTime = System.currentTimeMillis();
322    
323                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
324    
325                            hits = toHits(
326                                    indexSearcher, new HitDocs(browseHits), query, startTime,
327                                    searchTime, searchContext.getStart(), searchContext.getEnd());
328    
329                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
330    
331                            for (Map.Entry<String, FacetAccessible> entry :
332                                            facetMap.entrySet()) {
333    
334                                    Facet facet = facets.get(entry.getKey());
335    
336                                    FacetAccessible facetAccessible = entry.getValue();
337    
338                                    FacetCollector facetCollector = new BoboFacetCollector(
339                                            entry.getKey(), facetAccessible);
340    
341                                    facet.setFacetCollector(facetCollector);
342                            }
343                    }
344                    catch (BooleanQuery.TooManyClauses tmc) {
345                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
346    
347                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
348    
349                            try {
350                                    long startTime = System.currentTimeMillis();
351    
352                                    BrowseResult result = browsable.browse(browseRequest);
353    
354                                    BrowseHit[] browseHits = result.getHits();
355    
356                                    long endTime = System.currentTimeMillis();
357    
358                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
359    
360                                    hits = toHits(
361                                            indexSearcher, new HitDocs(browseHits), query, startTime,
362                                            searchTime, searchContext.getStart(),
363                                            searchContext.getEnd());
364    
365                                    Map<String, FacetAccessible> facetMap = result.getFacetMap();
366    
367                                    for (Map.Entry<String, FacetAccessible> entry :
368                                                    facetMap.entrySet()) {
369    
370                                            Facet facet = facets.get(entry.getKey());
371    
372                                            FacetAccessible facetAccessible = entry.getValue();
373    
374                                            FacetCollector facetCollector = new BoboFacetCollector(
375                                                    entry.getKey(), facetAccessible);
376    
377                                            facet.setFacetCollector(facetCollector);
378                                    }
379                            }
380                            catch (Exception e) {
381                                    throw new SearchException(e);
382                            }
383                            finally {
384                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
385                            }
386                    }
387                    catch (ParseException pe) {
388                            _log.error("Query " + query, pe);
389    
390                            return new HitsImpl();
391                    }
392                    catch (Exception e) {
393                            throw new SearchException(e);
394                    }
395                    finally {
396                            if (browsable != null) {
397                                    try {
398                                            browsable.close();
399                                    }
400                                    catch (IOException ioe) {
401                                            _log.error(ioe, ioe);
402                                    }
403                            }
404    
405                            if (indexSearcher != null) {
406                                    try {
407                                            indexSearcher.close();
408                                    }
409                                    catch (IOException ioe) {
410                                            _log.error(ioe, ioe);
411                                    }
412                            }
413                    }
414    
415                    if (_log.isDebugEnabled()) {
416                            _log.debug(
417                                    "Search found " + hits.getLength() + " results in " +
418                                            hits.getSearchTime() + "ms");
419                    }
420    
421                    return hits;
422            }
423    
424            protected DocumentImpl getDocument(
425                    org.apache.lucene.document.Document oldDocument) {
426    
427                    DocumentImpl newDocument = new DocumentImpl();
428    
429                    List<org.apache.lucene.document.Fieldable> oldFieldables =
430                            oldDocument.getFields();
431    
432                    for (org.apache.lucene.document.Fieldable oldFieldable :
433                                    oldFieldables) {
434    
435                            Field newField = null;
436    
437                            String[] values = oldDocument.getValues(oldFieldable.name());
438    
439                            if ((values != null) && (values.length > 1)) {
440                                    newField = new Field(oldFieldable.name(), values);
441                            }
442                            else {
443                                    newField = new Field(
444                                            oldFieldable.name(), oldFieldable.stringValue());
445                            }
446    
447                            newField.setNumeric(oldFieldable instanceof NumericField);
448                            newField.setTokenized(oldFieldable.isTokenized());
449    
450                            newDocument.add(newField);
451                    }
452    
453                    return newDocument;
454            }
455    
456            protected String[] getQueryTerms(Query query) {
457                    String[] queryTerms = new String[0];
458    
459                    try {
460                            queryTerms = LuceneHelperUtil.getQueryTerms(
461                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
462                                            query));
463                    }
464                    catch (ParseException pe) {
465                            _log.error("Query " + query, pe);
466                    }
467    
468                    return queryTerms;
469            }
470    
471            protected String getSnippet(
472                            org.apache.lucene.document.Document doc, Query query, String field,
473                            Locale locale)
474                    throws IOException {
475    
476                    String localizedName = DocumentImpl.getLocalizedName(locale, field);
477    
478                    String[] values = doc.getValues(localizedName);
479    
480                    if ((values == null) || (values.length == 0)) {
481                            values = doc.getValues(field);
482                    }
483    
484                    String snippet = null;
485    
486                    if (Validator.isNull(values)) {
487                            return snippet;
488                    }
489    
490                    String s = StringUtil.merge(values);
491    
492                    try {
493                            snippet = LuceneHelperUtil.getSnippet(
494                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
495                                            query),
496                                    field, s);
497                    }
498                    catch (ParseException pe) {
499                            _log.error("Query " + query, pe);
500                    }
501    
502                    return snippet;
503            }
504    
505            protected Hits toHits(
506                            org.apache.lucene.search.IndexSearcher indexSearcher,
507                            HitDocs hitDocs, Query query, long startTime,
508                            float searchTime, int start, int end)
509                    throws IOException, ParseException {
510    
511                    int length = hitDocs.getTotalHits();
512    
513                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
514                            start = 0;
515                            end = length;
516                    }
517    
518                    String[] queryTerms = getQueryTerms(query);
519    
520                    IndexReader indexReader = indexSearcher.getIndexReader();
521    
522                    List<String> indexedFieldNames = new ArrayList<String> (
523                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
524    
525                    org.apache.lucene.search.Query luceneQuery =
526                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
527                                    query);
528    
529                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
530                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
531    
532                    Hits hits = new HitsImpl();
533    
534                    if ((start > -1) && (start <= end)) {
535                            if (end > length) {
536                                    end = length;
537                            }
538    
539                            if (start > end) {
540                                    start = end;
541                            }
542    
543                            int subsetTotal = end - start;
544    
545                            if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
546                                    subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
547                            }
548    
549                            List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
550                            List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
551                            List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
552    
553                            QueryConfig queryConfig = query.getQueryConfig();
554    
555                            for (int i = start; i < end; i++) {
556                                    if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
557                                            break;
558                                    }
559    
560                                    int docId = hitDocs.getDocId(i);
561    
562                                    org.apache.lucene.document.Document document =
563                                            indexSearcher.doc(docId);
564    
565                                    Document subsetDocument = getDocument(document);
566    
567                                    String subsetSnippet = StringPool.BLANK;
568    
569                                    if (queryConfig.isHighlightEnabled()) {
570                                            subsetSnippet = getSnippet(
571                                                    document, query, Field.CONTENT,
572                                                    queryConfig.getLocale());
573                                    }
574    
575                                    subsetDocument.addText(Field.SNIPPET, subsetSnippet);
576    
577                                    subsetSnippets.add(subsetSnippet);
578    
579                                    subsetDocs.add(subsetDocument);
580    
581                                    Float subsetScore = hitDocs.getScore(i);
582    
583                                    if (scoredFieldNamesCount > 0) {
584                                            subsetScore = subsetScore / scoredFieldNamesCount;
585                                    }
586    
587                                    subsetScores.add(subsetScore);
588    
589                                    if (_log.isDebugEnabled()) {
590                                            try {
591                                                    Explanation explanation = indexSearcher.explain(
592                                                            luceneQuery, docId);
593    
594                                                    _log.debug(explanation.toString());
595                                            }
596                                            catch (Exception e) {
597                                            }
598                                    }
599                            }
600    
601                            hits.setStart(startTime);
602                            hits.setSearchTime(searchTime);
603                            hits.setQuery(query);
604                            hits.setQueryTerms(queryTerms);
605                            hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
606                            hits.setLength(length);
607                            hits.setSnippets(
608                                    subsetSnippets.toArray(new String[subsetSnippets.size()]));
609                            hits.setScores(
610                                    subsetScores.toArray(new Float[subsetScores.size()]));
611                    }
612    
613                    return hits;
614            }
615    
616            private static Log _log = LogFactoryUtil.getLog(
617                    LuceneIndexSearcherImpl.class);
618    
619            private class HitDocs {
620    
621                    public HitDocs(BrowseHit[] browseHits) {
622                            _browseHits = browseHits;
623                    }
624    
625                    public HitDocs(TopFieldDocs topFieldDocs) {
626                            _topFieldDocs = topFieldDocs;
627                    }
628    
629                    public int getDocId(int i) {
630                            if (_topFieldDocs != null) {
631                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
632    
633                                    return scoreDoc.doc;
634                            }
635                            else if (_browseHits != null) {
636                                    return _browseHits[i].getDocid();
637                            }
638    
639                            throw new IllegalStateException();
640                    }
641    
642                    public float getScore(int i) {
643                            if (_topFieldDocs != null) {
644                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
645    
646                                    return scoreDoc.score;
647                            }
648                            else if (_browseHits != null) {
649                                    return _browseHits[i].getScore();
650                            }
651    
652                            throw new IllegalStateException();
653                    }
654    
655                    public int getTotalHits() {
656                            if (_topFieldDocs != null) {
657                                    return _topFieldDocs.totalHits;
658                            }
659                            else if (_browseHits != null) {
660                                    return _browseHits.length;
661                            }
662    
663                            throw new IllegalStateException();
664                    }
665    
666                    private BrowseHit[] _browseHits;
667                    private TopFieldDocs _topFieldDocs;
668    
669            }
670    
671    }