001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.Browsable;
020 import com.browseengine.bobo.api.BrowseHit;
021 import com.browseengine.bobo.api.BrowseRequest;
022 import com.browseengine.bobo.api.BrowseResult;
023 import com.browseengine.bobo.api.FacetAccessible;
024 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031
032 import com.liferay.portal.kernel.dao.orm.QueryUtil;
033 import com.liferay.portal.kernel.json.JSONArray;
034 import com.liferay.portal.kernel.json.JSONObject;
035 import com.liferay.portal.kernel.log.Log;
036 import com.liferay.portal.kernel.log.LogFactoryUtil;
037 import com.liferay.portal.kernel.search.Document;
038 import com.liferay.portal.kernel.search.DocumentImpl;
039 import com.liferay.portal.kernel.search.Field;
040 import com.liferay.portal.kernel.search.Hits;
041 import com.liferay.portal.kernel.search.HitsImpl;
042 import com.liferay.portal.kernel.search.IndexSearcher;
043 import com.liferay.portal.kernel.search.ParseException;
044 import com.liferay.portal.kernel.search.Query;
045 import com.liferay.portal.kernel.search.QueryConfig;
046 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047 import com.liferay.portal.kernel.search.SearchContext;
048 import com.liferay.portal.kernel.search.SearchException;
049 import com.liferay.portal.kernel.search.Sort;
050 import com.liferay.portal.kernel.search.facet.Facet;
051 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052 import com.liferay.portal.kernel.search.facet.RangeFacet;
053 import com.liferay.portal.kernel.search.facet.SimpleFacet;
054 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056 import com.liferay.portal.kernel.util.ArrayUtil;
057 import com.liferay.portal.kernel.util.StringPool;
058 import com.liferay.portal.kernel.util.StringUtil;
059 import com.liferay.portal.kernel.util.Time;
060 import com.liferay.portal.kernel.util.Validator;
061 import com.liferay.portal.search.BoboFacetCollector;
062 import com.liferay.portal.util.PropsValues;
063
064 import java.io.IOException;
065
066 import java.util.ArrayList;
067 import java.util.HashSet;
068 import java.util.List;
069 import java.util.Locale;
070 import java.util.Map;
071 import java.util.Set;
072
073 import org.apache.lucene.document.NumericField;
074 import org.apache.lucene.index.IndexReader;
075 import org.apache.lucene.search.BooleanQuery;
076 import org.apache.lucene.search.Explanation;
077 import org.apache.lucene.search.ScoreDoc;
078 import org.apache.lucene.search.SortField;
079 import org.apache.lucene.search.TopFieldDocs;
080 import org.apache.lucene.search.highlight.Formatter;
081 import org.apache.lucene.search.highlight.TokenGroup;
082
083
086 public class LuceneIndexSearcherImpl implements IndexSearcher {
087
088 public Hits search(SearchContext searchContext, Query query)
089 throws SearchException {
090
091 if (_log.isDebugEnabled()) {
092 _log.debug("Query " + query);
093 }
094
095 Hits hits = null;
096
097 org.apache.lucene.search.IndexSearcher indexSearcher = null;
098 Map<String, Facet> facets = null;
099 BrowseRequest browseRequest = null;
100 Browsable browsable = null;
101
102 try {
103 indexSearcher = LuceneHelperUtil.getSearcher(
104 searchContext.getCompanyId(), true);
105
106 List<FacetHandler<?>> facetHandlers =
107 new ArrayList<FacetHandler<?>>();
108
109 facets = searchContext.getFacets();
110
111 for (Facet facet : facets.values()) {
112 if (facet.isStatic()) {
113 continue;
114 }
115
116 FacetConfiguration facetConfiguration =
117 facet.getFacetConfiguration();
118
119 if (facet instanceof MultiValueFacet) {
120 MultiValueFacetHandler multiValueFacetHandler =
121 new MultiValueFacetHandler(
122 facetConfiguration.getFieldName(),
123 facetConfiguration.getFieldName());
124
125 JSONObject dataJSONObject = facetConfiguration.getData();
126
127 if (dataJSONObject.has("maxTerms")) {
128 multiValueFacetHandler.setMaxItems(
129 dataJSONObject.getInt("maxTerms"));
130 }
131
132 facetHandlers.add(multiValueFacetHandler);
133 }
134 else if (facet instanceof RangeFacet) {
135 List<String> ranges = new ArrayList<String>();
136
137 JSONObject dataJSONObject = facetConfiguration.getData();
138
139 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
140 "ranges");
141
142 if (rangesJSONArray != null) {
143 for (int i = 0; i < rangesJSONArray.length(); i++) {
144 JSONObject rangeJSONObject =
145 rangesJSONArray.getJSONObject(i);
146
147 ranges.add(rangeJSONObject.getString("range"));
148 }
149 }
150
151 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
152 facetConfiguration.getFieldName(),
153 facetConfiguration.getFieldName(), ranges);
154
155 rangeFacetHandler.setTermCountSize(TermCountSize.large);
156
157 facetHandlers.add(rangeFacetHandler);
158 }
159 else if (facet instanceof SimpleFacet) {
160 SimpleFacetHandler simpleFacetHandler =
161 new SimpleFacetHandler(
162 facetConfiguration.getFieldName(),
163 facetConfiguration.getFieldName());
164
165 facetHandlers.add(simpleFacetHandler);
166 }
167 }
168
169 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
170 indexSearcher.getIndexReader(), facetHandlers);
171
172 SortField[] sortFields = new SortField[0];
173
174 Sort[] sorts = searchContext.getSorts();
175
176 if (sorts != null) {
177 sortFields = new SortField[sorts.length];
178
179 for (int i = 0; i < sorts.length; i++) {
180 Sort sort = sorts[i];
181
182 sortFields[i] = new SortField(
183 sort.getFieldName(), sort.getType(), sort.isReverse());
184 }
185 }
186
187 browseRequest = new BrowseRequest();
188
189 for (Facet facet : facets.values()) {
190 if (facet.isStatic()) {
191 continue;
192 }
193
194 FacetConfiguration facetConfiguration =
195 facet.getFacetConfiguration();
196
197 FacetSpec facetSpec = new FacetSpec();
198
199 facetSpec.setOrderBy(
200 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
201
202 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
203 }
204
205 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
206 browseRequest.setOffset(0);
207 browseRequest.setQuery(
208 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
209 query));
210 browseRequest.setSort(sortFields);
211
212 browsable = new BoboBrowser(boboIndexReader);
213
214 long startTime = System.currentTimeMillis();
215
216 BrowseResult browseResult = browsable.browse(browseRequest);
217
218 BrowseHit[] browseHits = browseResult.getHits();
219
220 long endTime = System.currentTimeMillis();
221
222 float searchTime = (float)(endTime - startTime) / Time.SECOND;
223
224 hits = toHits(
225 indexSearcher, new HitDocs(browseHits), query, startTime,
226 searchTime, searchContext.getStart(), searchContext.getEnd());
227
228 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
229
230 for (Map.Entry<String, FacetAccessible> entry :
231 facetMap.entrySet()) {
232
233 Facet facet = facets.get(entry.getKey());
234
235 FacetAccessible facetAccessible = entry.getValue();
236
237 FacetCollector facetCollector = new BoboFacetCollector(
238 entry.getKey(), facetAccessible);
239
240 facet.setFacetCollector(facetCollector);
241 }
242 }
243 catch (BooleanQuery.TooManyClauses tmc) {
244 int maxClauseCount = BooleanQuery.getMaxClauseCount();
245
246 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
247
248 try {
249 long startTime = System.currentTimeMillis();
250
251 BrowseResult result = browsable.browse(browseRequest);
252
253 BrowseHit[] browseHits = result.getHits();
254
255 long endTime = System.currentTimeMillis();
256
257 float searchTime = (float)(endTime - startTime) / Time.SECOND;
258
259 hits = toHits(
260 indexSearcher, new HitDocs(browseHits), query, startTime,
261 searchTime, searchContext.getStart(),
262 searchContext.getEnd());
263
264 Map<String, FacetAccessible> facetMap = result.getFacetMap();
265
266 for (Map.Entry<String, FacetAccessible> entry :
267 facetMap.entrySet()) {
268
269 Facet facet = facets.get(entry.getKey());
270
271 FacetAccessible facetAccessible = entry.getValue();
272
273 FacetCollector facetCollector = new BoboFacetCollector(
274 entry.getKey(), facetAccessible);
275
276 facet.setFacetCollector(facetCollector);
277 }
278 }
279 catch (Exception e) {
280 throw new SearchException(e);
281 }
282 finally {
283 BooleanQuery.setMaxClauseCount(maxClauseCount);
284 }
285 }
286 catch (ParseException pe) {
287 _log.error("Query " + query, pe);
288
289 return new HitsImpl();
290 }
291 catch (Exception e) {
292 throw new SearchException(e);
293 }
294 finally {
295 close(browsable);
296
297 if (indexSearcher != null) {
298 try {
299 indexSearcher.close();
300 }
301 catch (IOException ioe) {
302 _log.error(ioe, ioe);
303 }
304 }
305 }
306
307 if (_log.isDebugEnabled()) {
308 _log.debug(
309 "Search found " + hits.getLength() + " results in " +
310 hits.getSearchTime() + "ms");
311 }
312
313 return hits;
314 }
315
316 public Hits search(
317 String searchEngineId, long companyId, Query query, Sort[] sorts,
318 int start, int end)
319 throws SearchException {
320
321 if (_log.isDebugEnabled()) {
322 _log.debug("Query " + query);
323 }
324
325 Hits hits = null;
326
327 org.apache.lucene.search.IndexSearcher indexSearcher = null;
328 org.apache.lucene.search.Sort luceneSort = null;
329
330 try {
331 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
332
333 if (sorts != null) {
334 SortField[] sortFields = new SortField[sorts.length];
335
336 for (int i = 0; i < sorts.length; i++) {
337 Sort sort = sorts[i];
338
339 sortFields[i] = new SortField(
340 sort.getFieldName(), sort.getType(), sort.isReverse());
341 }
342
343 luceneSort = new org.apache.lucene.search.Sort(sortFields);
344 }
345 else {
346 luceneSort = new org.apache.lucene.search.Sort();
347 }
348
349 long startTime = System.currentTimeMillis();
350
351 TopFieldDocs topFieldDocs = indexSearcher.search(
352 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
353 query),
354 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
355
356 long endTime = System.currentTimeMillis();
357
358 float searchTime = (float)(endTime - startTime) / Time.SECOND;
359
360 hits = toHits(
361 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
362 searchTime, start, end);
363 }
364 catch (BooleanQuery.TooManyClauses tmc) {
365 int maxClauseCount = BooleanQuery.getMaxClauseCount();
366
367 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
368
369 try {
370 long startTime = System.currentTimeMillis();
371
372 TopFieldDocs topFieldDocs = indexSearcher.search(
373 (org.apache.lucene.search.Query)
374 QueryTranslatorUtil.translate(query),
375 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
376
377 long endTime = System.currentTimeMillis();
378
379 float searchTime = (float)(endTime - startTime) / Time.SECOND;
380
381 hits = toHits(
382 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
383 searchTime, start, end);
384 }
385 catch (Exception e) {
386 throw new SearchException(e);
387 }
388 finally {
389 BooleanQuery.setMaxClauseCount(maxClauseCount);
390 }
391 }
392 catch (ParseException pe) {
393 _log.error("Query " + query, pe);
394
395 return new HitsImpl();
396 }
397 catch (Exception e) {
398 throw new SearchException(e);
399 }
400 finally {
401 if (indexSearcher != null) {
402 try {
403 indexSearcher.close();
404 }
405 catch (IOException ioe) {
406 _log.error(ioe, ioe);
407 }
408 }
409 }
410
411 if (_log.isDebugEnabled()) {
412 _log.debug(
413 "Search found " + hits.getLength() + " results in " +
414 hits.getSearchTime() + "ms");
415 }
416
417 return hits;
418 }
419
420 @SuppressWarnings("deprecation")
421 protected void close(Browsable browsable) {
422 if (browsable != null) {
423 try {
424 browsable.close();
425 }
426 catch (IOException ioe) {
427 _log.error(ioe, ioe);
428 }
429 }
430 }
431
432 protected DocumentImpl getDocument(
433 org.apache.lucene.document.Document oldDocument) {
434
435 DocumentImpl newDocument = new DocumentImpl();
436
437 List<org.apache.lucene.document.Fieldable> oldFieldables =
438 oldDocument.getFields();
439
440 for (org.apache.lucene.document.Fieldable oldFieldable :
441 oldFieldables) {
442
443 Field newField = null;
444
445 String[] values = oldDocument.getValues(oldFieldable.name());
446
447 if ((values != null) && (values.length > 1)) {
448 newField = new Field(oldFieldable.name(), values);
449 }
450 else {
451 newField = new Field(
452 oldFieldable.name(), oldFieldable.stringValue());
453 }
454
455 newField.setNumeric(oldFieldable instanceof NumericField);
456 newField.setTokenized(oldFieldable.isTokenized());
457
458 newDocument.add(newField);
459 }
460
461 return newDocument;
462 }
463
464 protected Set<String> getQueryTerms(Query query) {
465 Set<String> queryTerms = new HashSet<String>();
466
467 try {
468 queryTerms = LuceneHelperUtil.getQueryTerms(
469 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
470 query));
471 }
472 catch (ParseException pe) {
473 _log.error("Query " + query, pe);
474 }
475
476 return queryTerms;
477 }
478
479 protected String getSnippet(
480 org.apache.lucene.document.Document doc, Query query, String field,
481 Locale locale, Document hitDoc, Set<String> matchingTerms)
482 throws IOException {
483
484 String snippetField = DocumentImpl.getLocalizedName(locale, field);
485 String snippet = null;
486
487 try {
488 org.apache.lucene.search.Query luceneQuery =
489 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
490 query);
491
492 String[] values = doc.getValues(snippetField);
493
494 TermCollectingFormatter termCollectingFormatter =
495 new TermCollectingFormatter();
496
497 if ((values != null) && (values.length > 0)) {
498 snippet = LuceneHelperUtil.getSnippet(
499 luceneQuery, snippetField, StringUtil.merge(values),
500 termCollectingFormatter);
501 }
502
503 if ((values == null) || (values.length == 0) ||
504 Validator.isNull(snippet)) {
505
506 snippetField = field;
507
508 values = doc.getValues(snippetField);
509
510 if (Validator.isNull(values)) {
511 return StringPool.BLANK;
512 }
513
514 snippet = LuceneHelperUtil.getSnippet(
515 luceneQuery, field, StringUtil.merge(values),
516 termCollectingFormatter);
517 }
518
519 if (Validator.isNull(snippet)) {
520 return StringPool.BLANK;
521 }
522
523 matchingTerms.addAll(termCollectingFormatter.getTerms());
524 }
525 catch (ParseException pe) {
526 _log.error("Query " + query, pe);
527 }
528
529 hitDoc.addText(
530 Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
531 snippet);
532
533 return snippet;
534 }
535
536 protected Hits toHits(
537 org.apache.lucene.search.IndexSearcher indexSearcher,
538 HitDocs hitDocs, Query query, long startTime, float searchTime,
539 int start, int end)
540 throws IOException, ParseException {
541
542 int length = hitDocs.getTotalHits();
543
544 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
545 start = 0;
546 end = length;
547 }
548
549 Set<String> queryTerms = new HashSet<String>();
550
551 IndexReader indexReader = indexSearcher.getIndexReader();
552
553 List<String> indexedFieldNames = new ArrayList<String> (
554 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
555
556 org.apache.lucene.search.Query luceneQuery =
557 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
558 query);
559
560 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
561 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
562
563 Hits hits = new HitsImpl();
564
565 if ((start > -1) && (start <= end)) {
566 if (end > length) {
567 end = length;
568 }
569
570 if (start > end) {
571 start = end;
572 }
573
574 int subsetTotal = end - start;
575
576 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
577 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
578 }
579
580 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
581 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
582
583 QueryConfig queryConfig = query.getQueryConfig();
584
585 for (int i = start; i < start + subsetTotal; i++) {
586 int docId = hitDocs.getDocId(i);
587
588 org.apache.lucene.document.Document document =
589 indexSearcher.doc(docId);
590
591 Document subsetDocument = getDocument(document);
592
593 if (queryConfig.isHighlightEnabled()) {
594 Locale locale = queryConfig.getLocale();
595
596 getSnippet(
597 document, query, Field.CONTENT, locale, subsetDocument,
598 queryTerms);
599 getSnippet(
600 document, query, Field.DESCRIPTION, locale,
601 subsetDocument, queryTerms);
602 getSnippet(
603 document, query, Field.TITLE, locale, subsetDocument,
604 queryTerms);
605 }
606
607 subsetDocs.add(subsetDocument);
608
609 Float subsetScore = hitDocs.getScore(i);
610
611 if (scoredFieldNamesCount > 0) {
612 subsetScore = subsetScore / scoredFieldNamesCount;
613 }
614
615 subsetScores.add(subsetScore);
616
617 if (_log.isDebugEnabled()) {
618 try {
619 Explanation explanation = indexSearcher.explain(
620 luceneQuery, docId);
621
622 _log.debug(explanation.toString());
623 }
624 catch (Exception e) {
625 }
626 }
627 }
628
629 if (!queryConfig.isHighlightEnabled()) {
630 queryTerms = getQueryTerms(query);
631 }
632
633 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
634 hits.setLength(length);
635 hits.setQuery(query);
636 hits.setQueryTerms(
637 queryTerms.toArray(new String[queryTerms.size()]));
638 hits.setScores(
639 subsetScores.toArray(new Float[subsetScores.size()]));
640 hits.setSearchTime(searchTime);
641 hits.setStart(startTime);
642 }
643
644 return hits;
645 }
646
647 private static Log _log = LogFactoryUtil.getLog(
648 LuceneIndexSearcherImpl.class);
649
650 private class HitDocs {
651
652 public HitDocs(BrowseHit[] browseHits) {
653 _browseHits = browseHits;
654 }
655
656 public HitDocs(TopFieldDocs topFieldDocs) {
657 _topFieldDocs = topFieldDocs;
658 }
659
660 public int getDocId(int i) {
661 if (_topFieldDocs != null) {
662 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
663
664 return scoreDoc.doc;
665 }
666 else if (_browseHits != null) {
667 return _browseHits[i].getDocid();
668 }
669
670 throw new IllegalStateException();
671 }
672
673 public float getScore(int i) {
674 if (_topFieldDocs != null) {
675 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
676
677 return scoreDoc.score;
678 }
679 else if (_browseHits != null) {
680 return _browseHits[i].getScore();
681 }
682
683 throw new IllegalStateException();
684 }
685
686 public int getTotalHits() {
687 if (_topFieldDocs != null) {
688 return _topFieldDocs.totalHits;
689 }
690 else if (_browseHits != null) {
691 return _browseHits.length;
692 }
693
694 throw new IllegalStateException();
695 }
696
697 private BrowseHit[] _browseHits;
698 private TopFieldDocs _topFieldDocs;
699
700 }
701
702 private class TermCollectingFormatter implements Formatter {
703
704 public Set<String> getTerms() {
705 return _terms;
706 }
707
708 public String highlightTerm(
709 String originalText, TokenGroup tokenGroup) {
710
711 if (tokenGroup.getTotalScore() > 0) {
712 _terms.add(originalText);
713 }
714
715 return originalText;
716 }
717
718 private Set<String> _terms = new HashSet<String>();
719
720 }
721
722 }