001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.BoboSubBrowser;
020 import com.browseengine.bobo.api.Browsable;
021 import com.browseengine.bobo.api.BrowseHit;
022 import com.browseengine.bobo.api.BrowseRequest;
023 import com.browseengine.bobo.api.BrowseResult;
024 import com.browseengine.bobo.api.FacetAccessible;
025 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
026 import com.browseengine.bobo.api.FacetSpec;
027 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
028 import com.browseengine.bobo.facets.FacetHandler;
029 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032
033 import com.liferay.portal.kernel.dao.orm.QueryUtil;
034 import com.liferay.portal.kernel.json.JSONArray;
035 import com.liferay.portal.kernel.json.JSONObject;
036 import com.liferay.portal.kernel.log.Log;
037 import com.liferay.portal.kernel.log.LogFactoryUtil;
038 import com.liferay.portal.kernel.search.Document;
039 import com.liferay.portal.kernel.search.DocumentImpl;
040 import com.liferay.portal.kernel.search.Field;
041 import com.liferay.portal.kernel.search.Hits;
042 import com.liferay.portal.kernel.search.HitsImpl;
043 import com.liferay.portal.kernel.search.IndexSearcher;
044 import com.liferay.portal.kernel.search.ParseException;
045 import com.liferay.portal.kernel.search.Query;
046 import com.liferay.portal.kernel.search.QueryConfig;
047 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
048 import com.liferay.portal.kernel.search.SearchContext;
049 import com.liferay.portal.kernel.search.SearchException;
050 import com.liferay.portal.kernel.search.Sort;
051 import com.liferay.portal.kernel.search.facet.Facet;
052 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
053 import com.liferay.portal.kernel.search.facet.RangeFacet;
054 import com.liferay.portal.kernel.search.facet.SimpleFacet;
055 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
056 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
057 import com.liferay.portal.kernel.util.ArrayUtil;
058 import com.liferay.portal.kernel.util.ReflectionUtil;
059 import com.liferay.portal.kernel.util.StringPool;
060 import com.liferay.portal.kernel.util.StringUtil;
061 import com.liferay.portal.kernel.util.Time;
062 import com.liferay.portal.kernel.util.Validator;
063 import com.liferay.portal.search.BoboFacetCollector;
064 import com.liferay.portal.util.PropsValues;
065
066 import java.io.IOException;
067
068 import java.util.ArrayList;
069 import java.util.List;
070 import java.util.Locale;
071 import java.util.Map;
072
073 import org.apache.lucene.document.NumericField;
074 import org.apache.lucene.index.IndexReader;
075 import org.apache.lucene.search.BooleanQuery;
076 import org.apache.lucene.search.Explanation;
077 import org.apache.lucene.search.ScoreDoc;
078 import org.apache.lucene.search.SortField;
079 import org.apache.lucene.search.TopFieldDocs;
080
081
084 public class LuceneIndexSearcherImpl implements IndexSearcher {
085
086 @Override
087 public Hits search(SearchContext searchContext, Query query)
088 throws SearchException {
089
090 if (_log.isDebugEnabled()) {
091 _log.debug("Query " + query);
092 }
093
094 Hits hits = null;
095
096 org.apache.lucene.search.IndexSearcher indexSearcher = null;
097 Map<String, Facet> facets = null;
098 BoboBrowser boboBrowser = null;
099 BrowseRequest browseRequest = null;
100
101 try {
102 indexSearcher = LuceneHelperUtil.getSearcher(
103 searchContext.getCompanyId(), true);
104
105 List<FacetHandler<?>> facetHandlers =
106 new ArrayList<FacetHandler<?>>();
107
108 facets = searchContext.getFacets();
109
110 for (Facet facet : facets.values()) {
111 if (facet.isStatic()) {
112 continue;
113 }
114
115 FacetConfiguration facetConfiguration =
116 facet.getFacetConfiguration();
117
118 if (facet instanceof MultiValueFacet) {
119 MultiValueFacetHandler multiValueFacetHandler =
120 new MultiValueFacetHandler(
121 facetConfiguration.getFieldName(),
122 facetConfiguration.getFieldName());
123
124 JSONObject dataJSONObject = facetConfiguration.getData();
125
126 if (dataJSONObject.has("maxTerms")) {
127 multiValueFacetHandler.setMaxItems(
128 dataJSONObject.getInt("maxTerms"));
129 }
130
131 facetHandlers.add(multiValueFacetHandler);
132 }
133 else if (facet instanceof RangeFacet) {
134 List<String> ranges = new ArrayList<String>();
135
136 JSONObject dataJSONObject = facetConfiguration.getData();
137
138 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
139 "ranges");
140
141 if (rangesJSONArray != null) {
142 for (int i = 0; i < rangesJSONArray.length(); i++) {
143 JSONObject rangeJSONObject =
144 rangesJSONArray.getJSONObject(i);
145
146 ranges.add(rangeJSONObject.getString("range"));
147 }
148 }
149
150 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
151 facetConfiguration.getFieldName(),
152 facetConfiguration.getFieldName(), ranges);
153
154 rangeFacetHandler.setTermCountSize(TermCountSize.large);
155
156 facetHandlers.add(rangeFacetHandler);
157 }
158 else if (facet instanceof SimpleFacet) {
159 SimpleFacetHandler simpleFacetHandler =
160 new SimpleFacetHandler(
161 facetConfiguration.getFieldName(),
162 facetConfiguration.getFieldName());
163
164 facetHandlers.add(simpleFacetHandler);
165 }
166 }
167
168 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
169 indexSearcher.getIndexReader(), facetHandlers);
170
171 SortField[] sortFields = new SortField[0];
172
173 Sort[] sorts = searchContext.getSorts();
174
175 if (sorts != null) {
176 sortFields = new SortField[sorts.length];
177
178 for (int i = 0; i < sorts.length; i++) {
179 Sort sort = sorts[i];
180
181 sortFields[i] = new SortField(
182 sort.getFieldName(), sort.getType(), sort.isReverse());
183 }
184 }
185
186 browseRequest = new BrowseRequest();
187
188 for (Facet facet : facets.values()) {
189 if (facet.isStatic()) {
190 continue;
191 }
192
193 FacetConfiguration facetConfiguration =
194 facet.getFacetConfiguration();
195
196 FacetSpec facetSpec = new FacetSpec();
197
198 facetSpec.setOrderBy(
199 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
200
201 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
202 }
203
204 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
205 browseRequest.setOffset(0);
206 browseRequest.setQuery(
207 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
208 query));
209 browseRequest.setSort(sortFields);
210
211 boboBrowser = new BoboBrowser(boboIndexReader);
212
213 long startTime = System.currentTimeMillis();
214
215 BrowseResult browseResult = boboBrowser.browse(browseRequest);
216
217 BrowseHit[] browseHits = browseResult.getHits();
218
219 long endTime = System.currentTimeMillis();
220
221 float searchTime = (float)(endTime - startTime) / Time.SECOND;
222
223 hits = toHits(
224 indexSearcher, new HitDocs(browseHits), query, startTime,
225 searchTime, searchContext.getStart(), searchContext.getEnd());
226
227 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
228
229 for (Map.Entry<String, FacetAccessible> entry :
230 facetMap.entrySet()) {
231
232 Facet facet = facets.get(entry.getKey());
233
234 FacetAccessible facetAccessible = entry.getValue();
235
236 FacetCollector facetCollector = new BoboFacetCollector(
237 entry.getKey(), facetAccessible);
238
239 facet.setFacetCollector(facetCollector);
240 }
241 }
242 catch (BooleanQuery.TooManyClauses tmc) {
243 int maxClauseCount = BooleanQuery.getMaxClauseCount();
244
245 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
246
247 try {
248 long startTime = System.currentTimeMillis();
249
250 BrowseResult browseResult = boboBrowser.browse(browseRequest);
251
252 BrowseHit[] browseHits = browseResult.getHits();
253
254 long endTime = System.currentTimeMillis();
255
256 float searchTime = (float)(endTime - startTime) / Time.SECOND;
257
258 hits = toHits(
259 indexSearcher, new HitDocs(browseHits), query, startTime,
260 searchTime, searchContext.getStart(),
261 searchContext.getEnd());
262
263 Map<String, FacetAccessible> facetMap =
264 browseResult.getFacetMap();
265
266 for (Map.Entry<String, FacetAccessible> entry :
267 facetMap.entrySet()) {
268
269 Facet facet = facets.get(entry.getKey());
270
271 FacetAccessible facetAccessible = entry.getValue();
272
273 FacetCollector facetCollector = new BoboFacetCollector(
274 entry.getKey(), facetAccessible);
275
276 facet.setFacetCollector(facetCollector);
277 }
278 }
279 catch (Exception e) {
280 throw new SearchException(e);
281 }
282 finally {
283 BooleanQuery.setMaxClauseCount(maxClauseCount);
284 }
285 }
286 catch (ParseException pe) {
287 _log.error("Query " + query, pe);
288
289 return new HitsImpl();
290 }
291 catch (Exception e) {
292 throw new SearchException(e);
293 }
294 finally {
295 close(boboBrowser);
296
297 close(indexSearcher);
298 }
299
300 if (_log.isDebugEnabled()) {
301 _log.debug(
302 "Search found " + hits.getLength() + " results in " +
303 hits.getSearchTime() + "ms");
304 }
305
306 return hits;
307 }
308
309 @Override
310 public Hits search(
311 String searchEngineId, long companyId, Query query, Sort[] sorts,
312 int start, int end)
313 throws SearchException {
314
315 if (_log.isDebugEnabled()) {
316 _log.debug("Query " + query);
317 }
318
319 Hits hits = null;
320
321 org.apache.lucene.search.IndexSearcher indexSearcher = null;
322 org.apache.lucene.search.Sort luceneSort = null;
323
324 try {
325 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
326
327 if (sorts != null) {
328 SortField[] sortFields = new SortField[sorts.length];
329
330 for (int i = 0; i < sorts.length; i++) {
331 Sort sort = sorts[i];
332
333 sortFields[i] = new SortField(
334 sort.getFieldName(), sort.getType(), sort.isReverse());
335 }
336
337 luceneSort = new org.apache.lucene.search.Sort(sortFields);
338 }
339 else {
340 luceneSort = new org.apache.lucene.search.Sort();
341 }
342
343 long startTime = System.currentTimeMillis();
344
345 TopFieldDocs topFieldDocs = indexSearcher.search(
346 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
347 query),
348 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
349
350 long endTime = System.currentTimeMillis();
351
352 float searchTime = (float)(endTime - startTime) / Time.SECOND;
353
354 hits = toHits(
355 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
356 searchTime, start, end);
357 }
358 catch (BooleanQuery.TooManyClauses tmc) {
359 int maxClauseCount = BooleanQuery.getMaxClauseCount();
360
361 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
362
363 try {
364 long startTime = System.currentTimeMillis();
365
366 TopFieldDocs topFieldDocs = indexSearcher.search(
367 (org.apache.lucene.search.Query)
368 QueryTranslatorUtil.translate(query),
369 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
370
371 long endTime = System.currentTimeMillis();
372
373 float searchTime = (float)(endTime - startTime) / Time.SECOND;
374
375 hits = toHits(
376 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
377 searchTime, start, end);
378 }
379 catch (Exception e) {
380 throw new SearchException(e);
381 }
382 finally {
383 BooleanQuery.setMaxClauseCount(maxClauseCount);
384 }
385 }
386 catch (ParseException pe) {
387 _log.error("Query " + query, pe);
388
389 return new HitsImpl();
390 }
391 catch (Exception e) {
392 throw new SearchException(e);
393 }
394 finally {
395 close(indexSearcher);
396 }
397
398 if (_log.isDebugEnabled()) {
399 _log.debug(
400 "Search found " + hits.getLength() + " results in " +
401 hits.getSearchTime() + "ms");
402 }
403
404 return hits;
405 }
406
407 protected void close(BoboBrowser boboBrowser) {
408 if (boboBrowser == null) {
409 return;
410 }
411
412 try {
413 boboBrowser.close();
414 }
415 catch (IOException ioe) {
416 _log.error(ioe, ioe);
417 }
418
419 Browsable[] browsables = boboBrowser.getSubBrowsers();
420
421 for (Browsable browsable : browsables) {
422 if (!(browsable instanceof BoboSubBrowser)) {
423 continue;
424 }
425
426 BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
427
428 BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
429
430 try {
431 ThreadLocal<?> threadLocal =
432 (ThreadLocal<?>)_runtimeFacetDataMapField.get(
433 boboIndexReader);
434
435 threadLocal.remove();
436
437 _runtimeFacetDataMapField.set(boboIndexReader, null);
438 }
439 catch (Exception e) {
440 _log.error(
441 "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
442 e);
443 }
444 }
445 }
446
447 protected void close(org.apache.lucene.search.IndexSearcher indexSearcher) {
448 if (indexSearcher == null) {
449 return;
450 }
451
452 try {
453 indexSearcher.close();
454
455 IndexReader indexReader = indexSearcher.getIndexReader();
456
457 if (indexReader != null) {
458 indexReader.close();
459 }
460 }
461 catch (IOException ioe) {
462 _log.error(ioe, ioe);
463 }
464 }
465
466 protected DocumentImpl getDocument(
467 org.apache.lucene.document.Document oldDocument) {
468
469 DocumentImpl newDocument = new DocumentImpl();
470
471 List<org.apache.lucene.document.Fieldable> oldFieldables =
472 oldDocument.getFields();
473
474 for (org.apache.lucene.document.Fieldable oldFieldable :
475 oldFieldables) {
476
477 Field newField = null;
478
479 String[] values = oldDocument.getValues(oldFieldable.name());
480
481 if ((values != null) && (values.length > 1)) {
482 newField = new Field(oldFieldable.name(), values);
483 }
484 else {
485 newField = new Field(
486 oldFieldable.name(), oldFieldable.stringValue());
487 }
488
489 newField.setNumeric(oldFieldable instanceof NumericField);
490 newField.setTokenized(oldFieldable.isTokenized());
491
492 newDocument.add(newField);
493 }
494
495 return newDocument;
496 }
497
498 protected String[] getQueryTerms(Query query) {
499 String[] queryTerms = new String[0];
500
501 try {
502 queryTerms = LuceneHelperUtil.getQueryTerms(
503 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
504 query));
505 }
506 catch (ParseException pe) {
507 _log.error("Query " + query, pe);
508 }
509
510 return queryTerms;
511 }
512
513 protected String getSnippet(
514 org.apache.lucene.document.Document doc, Query query, String field,
515 Locale locale)
516 throws IOException {
517
518 String localizedName = DocumentImpl.getLocalizedName(locale, field);
519
520 String[] values = doc.getValues(localizedName);
521
522 if ((values == null) || (values.length == 0)) {
523 values = doc.getValues(field);
524 }
525
526 String snippet = null;
527
528 if (Validator.isNull(values)) {
529 return snippet;
530 }
531
532 String s = StringUtil.merge(values);
533
534 try {
535 snippet = LuceneHelperUtil.getSnippet(
536 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
537 query),
538 field, s);
539 }
540 catch (ParseException pe) {
541 _log.error("Query " + query, pe);
542 }
543
544 return snippet;
545 }
546
547 protected Hits toHits(
548 org.apache.lucene.search.IndexSearcher indexSearcher,
549 HitDocs hitDocs, Query query, long startTime, float searchTime,
550 int start, int end)
551 throws IOException, ParseException {
552
553 int length = hitDocs.getTotalHits();
554
555 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
556 start = 0;
557 end = length;
558 }
559
560 String[] queryTerms = getQueryTerms(query);
561
562 IndexReader indexReader = indexSearcher.getIndexReader();
563
564 List<String> indexedFieldNames = new ArrayList<String> (
565 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
566
567 org.apache.lucene.search.Query luceneQuery =
568 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
569 query);
570
571 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
572 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
573
574 Hits hits = new HitsImpl();
575
576 if ((start > -1) && (start <= end)) {
577 if (end > length) {
578 end = length;
579 }
580
581 if (start > end) {
582 start = end;
583 }
584
585 int subsetTotal = end - start;
586
587 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
588 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
589 }
590
591 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
592 List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
593 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
594
595 QueryConfig queryConfig = query.getQueryConfig();
596
597 for (int i = start; i < end; i++) {
598 if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
599 break;
600 }
601
602 int docId = hitDocs.getDocId(i);
603
604 org.apache.lucene.document.Document document =
605 indexSearcher.doc(docId);
606
607 Document subsetDocument = getDocument(document);
608
609 String subsetSnippet = StringPool.BLANK;
610
611 if (queryConfig.isHighlightEnabled()) {
612 subsetSnippet = getSnippet(
613 document, query, Field.CONTENT,
614 queryConfig.getLocale());
615 }
616
617 subsetDocument.addText(Field.SNIPPET, subsetSnippet);
618
619 subsetSnippets.add(subsetSnippet);
620
621 subsetDocs.add(subsetDocument);
622
623 Float subsetScore = hitDocs.getScore(i);
624
625 if (scoredFieldNamesCount > 0) {
626 subsetScore = subsetScore / scoredFieldNamesCount;
627 }
628
629 subsetScores.add(subsetScore);
630
631 if (_log.isDebugEnabled()) {
632 try {
633 Explanation explanation = indexSearcher.explain(
634 luceneQuery, docId);
635
636 _log.debug(explanation.toString());
637 }
638 catch (Exception e) {
639 }
640 }
641 }
642
643 hits.setStart(startTime);
644 hits.setSearchTime(searchTime);
645 hits.setQuery(query);
646 hits.setQueryTerms(queryTerms);
647 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
648 hits.setLength(length);
649 hits.setSnippets(
650 subsetSnippets.toArray(new String[subsetSnippets.size()]));
651 hits.setScores(
652 subsetScores.toArray(new Float[subsetScores.size()]));
653 }
654
655 return hits;
656 }
657
658 private static Log _log = LogFactoryUtil.getLog(
659 LuceneIndexSearcherImpl.class);
660
661 private static java.lang.reflect.Field _runtimeFacetDataMapField;
662
663 static {
664 try {
665 _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
666 BoboIndexReader.class, "_runtimeFacetDataMap");
667 }
668 catch (Exception e) {
669 throw new ExceptionInInitializerError(e);
670 }
671 }
672
673 private class HitDocs {
674
675 public HitDocs(BrowseHit[] browseHits) {
676 _browseHits = browseHits;
677 }
678
679 public HitDocs(TopFieldDocs topFieldDocs) {
680 _topFieldDocs = topFieldDocs;
681 }
682
683 public int getDocId(int i) {
684 if (_topFieldDocs != null) {
685 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
686
687 return scoreDoc.doc;
688 }
689 else if (_browseHits != null) {
690 return _browseHits[i].getDocid();
691 }
692
693 throw new IllegalStateException();
694 }
695
696 public float getScore(int i) {
697 if (_topFieldDocs != null) {
698 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
699
700 return scoreDoc.score;
701 }
702 else if (_browseHits != null) {
703 return _browseHits[i].getScore();
704 }
705
706 throw new IllegalStateException();
707 }
708
709 public int getTotalHits() {
710 if (_topFieldDocs != null) {
711 return _topFieldDocs.totalHits;
712 }
713 else if (_browseHits != null) {
714 return _browseHits.length;
715 }
716
717 throw new IllegalStateException();
718 }
719
720 private BrowseHit[] _browseHits;
721 private TopFieldDocs _topFieldDocs;
722
723 }
724
725 }