001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.BoboSubBrowser;
020 import com.browseengine.bobo.api.Browsable;
021 import com.browseengine.bobo.api.BrowseHit;
022 import com.browseengine.bobo.api.BrowseRequest;
023 import com.browseengine.bobo.api.BrowseResult;
024 import com.browseengine.bobo.api.FacetAccessible;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
029 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032
033 import com.liferay.portal.kernel.dao.orm.QueryUtil;
034 import com.liferay.portal.kernel.dao.search.SearchPaginationUtil;
035 import com.liferay.portal.kernel.json.JSONArray;
036 import com.liferay.portal.kernel.json.JSONObject;
037 import com.liferay.portal.kernel.log.Log;
038 import com.liferay.portal.kernel.log.LogFactoryUtil;
039 import com.liferay.portal.kernel.search.BaseIndexSearcher;
040 import com.liferay.portal.kernel.search.Document;
041 import com.liferay.portal.kernel.search.DocumentImpl;
042 import com.liferay.portal.kernel.search.Field;
043 import com.liferay.portal.kernel.search.Hits;
044 import com.liferay.portal.kernel.search.HitsImpl;
045 import com.liferay.portal.kernel.search.ParseException;
046 import com.liferay.portal.kernel.search.Query;
047 import com.liferay.portal.kernel.search.QueryConfig;
048 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
049 import com.liferay.portal.kernel.search.SearchContext;
050 import com.liferay.portal.kernel.search.SearchException;
051 import com.liferay.portal.kernel.search.Sort;
052 import com.liferay.portal.kernel.search.facet.Facet;
053 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
054 import com.liferay.portal.kernel.search.facet.RangeFacet;
055 import com.liferay.portal.kernel.search.facet.SimpleFacet;
056 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
057 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
058 import com.liferay.portal.kernel.util.ArrayUtil;
059 import com.liferay.portal.kernel.util.ReflectionUtil;
060 import com.liferay.portal.kernel.util.StringPool;
061 import com.liferay.portal.kernel.util.StringUtil;
062 import com.liferay.portal.kernel.util.Time;
063 import com.liferay.portal.kernel.util.Validator;
064 import com.liferay.portal.search.BoboFacetCollector;
065 import com.liferay.portal.util.PropsValues;
066
067 import java.io.IOException;
068
069 import java.util.ArrayList;
070 import java.util.HashSet;
071 import java.util.List;
072 import java.util.Locale;
073 import java.util.Map;
074 import java.util.Set;
075
076 import org.apache.lucene.document.NumericField;
077 import org.apache.lucene.index.IndexReader;
078 import org.apache.lucene.search.BooleanQuery;
079 import org.apache.lucene.search.Explanation;
080 import org.apache.lucene.search.IndexSearcher;
081 import org.apache.lucene.search.ScoreDoc;
082 import org.apache.lucene.search.SortField;
083 import org.apache.lucene.search.TopFieldDocs;
084 import org.apache.lucene.search.highlight.Formatter;
085 import org.apache.lucene.search.highlight.TokenGroup;
086
087
090 public class LuceneIndexSearcher extends BaseIndexSearcher {
091
092 @Override
093 public Hits search(SearchContext searchContext, Query query)
094 throws SearchException {
095
096 if (_log.isDebugEnabled()) {
097 _log.debug("Query " + query);
098 }
099
100 Hits hits = null;
101
102 IndexSearcher indexSearcher = null;
103 Map<String, Facet> facets = null;
104 BoboBrowser boboBrowser = null;
105 BrowseRequest browseRequest = null;
106
107 try {
108 indexSearcher = LuceneHelperUtil.getSearcher(
109 searchContext.getCompanyId(), true);
110
111 List<FacetHandler<?>> facetHandlers =
112 new ArrayList<FacetHandler<?>>();
113
114 facets = searchContext.getFacets();
115
116 for (Facet facet : facets.values()) {
117 if (facet.isStatic()) {
118 continue;
119 }
120
121 FacetConfiguration facetConfiguration =
122 facet.getFacetConfiguration();
123
124 if (facet instanceof MultiValueFacet) {
125 MultiValueFacetHandler multiValueFacetHandler =
126 new MultiValueFacetHandler(
127 facetConfiguration.getFieldName(),
128 facetConfiguration.getFieldName());
129
130 JSONObject dataJSONObject = facetConfiguration.getData();
131
132 if (dataJSONObject.has("maxTerms")) {
133 multiValueFacetHandler.setMaxItems(
134 dataJSONObject.getInt("maxTerms"));
135 }
136
137 facetHandlers.add(multiValueFacetHandler);
138 }
139 else if (facet instanceof RangeFacet) {
140 List<String> ranges = new ArrayList<String>();
141
142 JSONObject dataJSONObject = facetConfiguration.getData();
143
144 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
145 "ranges");
146
147 if (rangesJSONArray != null) {
148 for (int i = 0; i < rangesJSONArray.length(); i++) {
149 JSONObject rangeJSONObject =
150 rangesJSONArray.getJSONObject(i);
151
152 ranges.add(rangeJSONObject.getString("range"));
153 }
154 }
155
156 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
157 facetConfiguration.getFieldName(),
158 facetConfiguration.getFieldName(), ranges);
159
160 rangeFacetHandler.setTermCountSize(TermCountSize.large);
161
162 facetHandlers.add(rangeFacetHandler);
163 }
164 else if (facet instanceof SimpleFacet) {
165 SimpleFacetHandler simpleFacetHandler =
166 new SimpleFacetHandler(
167 facetConfiguration.getFieldName(),
168 facetConfiguration.getFieldName());
169
170 facetHandlers.add(simpleFacetHandler);
171 }
172 }
173
174 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
175 indexSearcher.getIndexReader(), facetHandlers);
176
177 SortField[] sortFields = new SortField[0];
178
179 Sort[] sorts = searchContext.getSorts();
180
181 if (sorts != null) {
182 sortFields = new SortField[sorts.length];
183
184 for (int i = 0; i < sorts.length; i++) {
185 Sort sort = sorts[i];
186
187 sortFields[i] = new SortField(
188 sort.getFieldName(), sort.getType(), sort.isReverse());
189 }
190 }
191
192 browseRequest = new BrowseRequest();
193
194 for (Facet facet : facets.values()) {
195 if (facet.isStatic()) {
196 continue;
197 }
198
199 FacetConfiguration facetConfiguration =
200 facet.getFacetConfiguration();
201
202 FacetSpec facetSpec = new FacetSpec();
203
204 facetSpec.setOrderBy(
205 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
206
207 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
208 }
209
210 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
211 browseRequest.setOffset(0);
212 browseRequest.setQuery(
213 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
214 query));
215 browseRequest.setSort(sortFields);
216
217 boboBrowser = new BoboBrowser(boboIndexReader);
218
219 long startTime = System.currentTimeMillis();
220
221 BrowseResult browseResult = boboBrowser.browse(browseRequest);
222
223 BrowseHit[] browseHits = browseResult.getHits();
224
225 long endTime = System.currentTimeMillis();
226
227 float searchTime = (float)(endTime - startTime) / Time.SECOND;
228
229 hits = toHits(
230 indexSearcher, new HitDocs(browseHits), query, startTime,
231 searchTime, searchContext.getStart(), searchContext.getEnd());
232
233 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
234
235 for (Map.Entry<String, FacetAccessible> entry :
236 facetMap.entrySet()) {
237
238 Facet facet = facets.get(entry.getKey());
239
240 FacetAccessible facetAccessible = entry.getValue();
241
242 FacetCollector facetCollector = new BoboFacetCollector(
243 entry.getKey(), facetAccessible);
244
245 facet.setFacetCollector(facetCollector);
246 }
247 }
248 catch (BooleanQuery.TooManyClauses tmc) {
249 int maxClauseCount = BooleanQuery.getMaxClauseCount();
250
251 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
252
253 try {
254 long startTime = System.currentTimeMillis();
255
256 BrowseResult browseResult = boboBrowser.browse(browseRequest);
257
258 BrowseHit[] browseHits = browseResult.getHits();
259
260 long endTime = System.currentTimeMillis();
261
262 float searchTime = (float)(endTime - startTime) / Time.SECOND;
263
264 hits = toHits(
265 indexSearcher, new HitDocs(browseHits), query, startTime,
266 searchTime, searchContext.getStart(),
267 searchContext.getEnd());
268
269 Map<String, FacetAccessible> facetMap =
270 browseResult.getFacetMap();
271
272 for (Map.Entry<String, FacetAccessible> entry :
273 facetMap.entrySet()) {
274
275 Facet facet = facets.get(entry.getKey());
276
277 FacetAccessible facetAccessible = entry.getValue();
278
279 FacetCollector facetCollector = new BoboFacetCollector(
280 entry.getKey(), facetAccessible);
281
282 facet.setFacetCollector(facetCollector);
283 }
284 }
285 catch (Exception e) {
286 throw new SearchException(e);
287 }
288 finally {
289 BooleanQuery.setMaxClauseCount(maxClauseCount);
290 }
291 }
292 catch (ParseException pe) {
293 _log.error("Query " + query, pe);
294
295 return new HitsImpl();
296 }
297 catch (Exception e) {
298 throw new SearchException(e);
299 }
300 finally {
301 cleanUp(boboBrowser);
302
303 LuceneHelperUtil.cleanUp(indexSearcher);
304 }
305
306 if (_log.isDebugEnabled()) {
307 _log.debug(
308 "Search found " + hits.getLength() + " results in " +
309 hits.getSearchTime() + "ms");
310 }
311
312 return hits;
313 }
314
315 @Override
316 public Hits search(
317 String searchEngineId, long companyId, Query query, Sort[] sorts,
318 int start, int end)
319 throws SearchException {
320
321 if (_log.isDebugEnabled()) {
322 _log.debug("Query " + query);
323 }
324
325 Hits hits = null;
326
327 IndexSearcher indexSearcher = null;
328 org.apache.lucene.search.Sort luceneSort = null;
329
330 try {
331 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
332
333 if (sorts != null) {
334 SortField[] sortFields = new SortField[sorts.length];
335
336 for (int i = 0; i < sorts.length; i++) {
337 Sort sort = sorts[i];
338
339 sortFields[i] = new SortField(
340 sort.getFieldName(), sort.getType(), sort.isReverse());
341 }
342
343 luceneSort = new org.apache.lucene.search.Sort(sortFields);
344 }
345 else {
346 luceneSort = new org.apache.lucene.search.Sort();
347 }
348
349 long startTime = System.currentTimeMillis();
350
351 TopFieldDocs topFieldDocs = indexSearcher.search(
352 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
353 query),
354 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
355
356 long endTime = System.currentTimeMillis();
357
358 float searchTime = (float)(endTime - startTime) / Time.SECOND;
359
360 hits = toHits(
361 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
362 searchTime, start, end);
363 }
364 catch (BooleanQuery.TooManyClauses tmc) {
365 int maxClauseCount = BooleanQuery.getMaxClauseCount();
366
367 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
368
369 try {
370 long startTime = System.currentTimeMillis();
371
372 TopFieldDocs topFieldDocs = indexSearcher.search(
373 (org.apache.lucene.search.Query)
374 QueryTranslatorUtil.translate(query),
375 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
376
377 long endTime = System.currentTimeMillis();
378
379 float searchTime = (float)(endTime - startTime) / Time.SECOND;
380
381 hits = toHits(
382 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
383 searchTime, start, end);
384 }
385 catch (Exception e) {
386 throw new SearchException(e);
387 }
388 finally {
389 BooleanQuery.setMaxClauseCount(maxClauseCount);
390 }
391 }
392 catch (ParseException pe) {
393 _log.error("Query " + query, pe);
394
395 return new HitsImpl();
396 }
397 catch (Exception e) {
398 throw new SearchException(e);
399 }
400 finally {
401 LuceneHelperUtil.cleanUp(indexSearcher);
402 }
403
404 if (_log.isDebugEnabled()) {
405 _log.debug(
406 "Search found " + hits.getLength() + " results in " +
407 hits.getSearchTime() + "ms");
408 }
409
410 return hits;
411 }
412
413 protected void cleanUp(BoboBrowser boboBrowser) {
414 if (boboBrowser == null) {
415 return;
416 }
417
418 try {
419 boboBrowser.close();
420 }
421 catch (IOException ioe) {
422 _log.error(ioe, ioe);
423 }
424
425 Browsable[] browsables = boboBrowser.getSubBrowsers();
426
427 for (Browsable browsable : browsables) {
428 if (!(browsable instanceof BoboSubBrowser)) {
429 continue;
430 }
431
432 BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
433
434 BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
435
436 try {
437 ThreadLocal<?> threadLocal =
438 (ThreadLocal<?>)_runtimeFacetDataMapField.get(
439 boboIndexReader);
440
441 threadLocal.remove();
442
443 _runtimeFacetDataMapField.set(boboIndexReader, null);
444 }
445 catch (Exception e) {
446 _log.error(
447 "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
448 e);
449 }
450 }
451 }
452
453 protected DocumentImpl getDocument(
454 org.apache.lucene.document.Document oldDocument) {
455
456 DocumentImpl newDocument = new DocumentImpl();
457
458 List<org.apache.lucene.document.Fieldable> oldFieldables =
459 oldDocument.getFields();
460
461 for (org.apache.lucene.document.Fieldable oldFieldable :
462 oldFieldables) {
463
464 Field newField = null;
465
466 String[] values = oldDocument.getValues(oldFieldable.name());
467
468 if ((values != null) && (values.length > 1)) {
469 newField = new Field(oldFieldable.name(), values);
470 }
471 else {
472 newField = new Field(
473 oldFieldable.name(), oldFieldable.stringValue());
474 }
475
476 newField.setNumeric(oldFieldable instanceof NumericField);
477 newField.setTokenized(oldFieldable.isTokenized());
478
479 newDocument.add(newField);
480 }
481
482 return newDocument;
483 }
484
485 protected Set<String> getQueryTerms(Query query) {
486 Set<String> queryTerms = new HashSet<String>();
487
488 try {
489 queryTerms = LuceneHelperUtil.getQueryTerms(
490 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
491 query));
492 }
493 catch (ParseException pe) {
494 _log.error("Query " + query, pe);
495 }
496
497 return queryTerms;
498 }
499
500 protected String getSnippet(
501 org.apache.lucene.document.Document doc, Query query, String field,
502 Locale locale, Document hitDoc, Set<String> matchingTerms)
503 throws IOException {
504
505 String snippetField = DocumentImpl.getLocalizedName(locale, field);
506 String snippet = null;
507
508 try {
509 org.apache.lucene.search.Query luceneQuery =
510 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
511 query);
512
513 String[] values = doc.getValues(snippetField);
514
515 TermCollectingFormatter termCollectingFormatter =
516 new TermCollectingFormatter();
517
518 if ((values != null) && (values.length > 0)) {
519 snippet = LuceneHelperUtil.getSnippet(
520 luceneQuery, snippetField, StringUtil.merge(values),
521 termCollectingFormatter);
522 }
523
524 if ((values == null) || (values.length == 0) ||
525 Validator.isNull(snippet)) {
526
527 snippetField = field;
528
529 values = doc.getValues(snippetField);
530
531 if (Validator.isNull(values)) {
532 return StringPool.BLANK;
533 }
534
535 snippet = LuceneHelperUtil.getSnippet(
536 luceneQuery, field, StringUtil.merge(values),
537 termCollectingFormatter);
538 }
539
540 if (Validator.isNull(snippet)) {
541 return StringPool.BLANK;
542 }
543
544 matchingTerms.addAll(termCollectingFormatter.getTerms());
545 }
546 catch (ParseException pe) {
547 _log.error("Query " + query, pe);
548 }
549
550 hitDoc.addText(
551 Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
552 snippet);
553
554 return snippet;
555 }
556
557 protected Hits toHits(
558 IndexSearcher indexSearcher, HitDocs hitDocs, Query query,
559 long startTime, float searchTime, int start, int end)
560 throws IOException, ParseException {
561
562 int total = hitDocs.getTotalHits();
563
564 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
565 start = 0;
566 end = total;
567 }
568
569 int[] startAndEnd = SearchPaginationUtil.calculateStartAndEnd(
570 start, end, total);
571
572 start = startAndEnd[0];
573 end = startAndEnd[1];
574
575 Set<String> queryTerms = new HashSet<String>();
576
577 IndexReader indexReader = indexSearcher.getIndexReader();
578
579 List<String> indexedFieldNames = new ArrayList<String> (
580 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
581
582 org.apache.lucene.search.Query luceneQuery =
583 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
584 query);
585
586 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
587 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
588
589 Hits hits = new HitsImpl();
590
591 if ((start < 0) || (start > end)) {
592 return hits;
593 }
594
595 int subsetTotal = end - start;
596
597 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
598 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
599 }
600
601 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
602 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
603
604 QueryConfig queryConfig = query.getQueryConfig();
605
606 for (int i = start; i < start + subsetTotal; i++) {
607 int docId = hitDocs.getDocId(i);
608
609 org.apache.lucene.document.Document document = indexSearcher.doc(
610 docId);
611
612 Document subsetDocument = getDocument(document);
613
614 if (queryConfig.isHighlightEnabled()) {
615 Locale locale = queryConfig.getLocale();
616
617 getSnippet(
618 document, query, Field.CONTENT, locale, subsetDocument,
619 queryTerms);
620 getSnippet(
621 document, query, Field.DESCRIPTION, locale, subsetDocument,
622 queryTerms);
623 getSnippet(
624 document, query, Field.TITLE, locale, subsetDocument,
625 queryTerms);
626 }
627
628 subsetDocs.add(subsetDocument);
629
630 Float subsetScore = hitDocs.getScore(i);
631
632 if (scoredFieldNamesCount > 0) {
633 subsetScore = subsetScore / scoredFieldNamesCount;
634 }
635
636 subsetScores.add(subsetScore);
637
638 if (_log.isDebugEnabled()) {
639 try {
640 Explanation explanation = indexSearcher.explain(
641 luceneQuery, docId);
642
643 _log.debug(explanation.toString());
644 }
645 catch (Exception e) {
646 }
647 }
648 }
649
650 if (!queryConfig.isHighlightEnabled()) {
651 queryTerms = getQueryTerms(query);
652 }
653
654 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
655 hits.setLength(total);
656 hits.setQuery(query);
657 hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()]));
658 hits.setScores(subsetScores.toArray(new Float[subsetScores.size()]));
659 hits.setSearchTime(searchTime);
660 hits.setStart(startTime);
661
662 return hits;
663 }
664
665 private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
666
667 private static java.lang.reflect.Field _runtimeFacetDataMapField;
668
669 static {
670 try {
671 _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
672 BoboIndexReader.class, "_runtimeFacetDataMap");
673 }
674 catch (Exception e) {
675 throw new ExceptionInInitializerError(e);
676 }
677 }
678
679 private class HitDocs {
680
681 public HitDocs(BrowseHit[] browseHits) {
682 _browseHits = browseHits;
683 }
684
685 public HitDocs(TopFieldDocs topFieldDocs) {
686 _topFieldDocs = topFieldDocs;
687 }
688
689 public int getDocId(int i) {
690 if (_topFieldDocs != null) {
691 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
692
693 return scoreDoc.doc;
694 }
695 else if (_browseHits != null) {
696 return _browseHits[i].getDocid();
697 }
698
699 throw new IllegalStateException();
700 }
701
702 public float getScore(int i) {
703 if (_topFieldDocs != null) {
704 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
705
706 return scoreDoc.score;
707 }
708 else if (_browseHits != null) {
709 return _browseHits[i].getScore();
710 }
711
712 throw new IllegalStateException();
713 }
714
715 public int getTotalHits() {
716 if (_topFieldDocs != null) {
717 return _topFieldDocs.totalHits;
718 }
719 else if (_browseHits != null) {
720 return _browseHits.length;
721 }
722
723 throw new IllegalStateException();
724 }
725
726 private BrowseHit[] _browseHits;
727 private TopFieldDocs _topFieldDocs;
728
729 }
730
731 private class TermCollectingFormatter implements Formatter {
732
733 public Set<String> getTerms() {
734 return _terms;
735 }
736
737 @Override
738 public String highlightTerm(
739 String originalText, TokenGroup tokenGroup) {
740
741 if (tokenGroup.getTotalScore() > 0) {
742 _terms.add(originalText);
743 }
744
745 return originalText;
746 }
747
748 private Set<String> _terms = new HashSet<String>();
749
750 }
751
752 }