001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.Browsable;
020 import com.browseengine.bobo.api.BrowseHit;
021 import com.browseengine.bobo.api.BrowseRequest;
022 import com.browseengine.bobo.api.BrowseResult;
023 import com.browseengine.bobo.api.FacetAccessible;
024 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031
032 import com.liferay.portal.kernel.dao.orm.QueryUtil;
033 import com.liferay.portal.kernel.json.JSONArray;
034 import com.liferay.portal.kernel.json.JSONObject;
035 import com.liferay.portal.kernel.log.Log;
036 import com.liferay.portal.kernel.log.LogFactoryUtil;
037 import com.liferay.portal.kernel.search.Document;
038 import com.liferay.portal.kernel.search.DocumentImpl;
039 import com.liferay.portal.kernel.search.Field;
040 import com.liferay.portal.kernel.search.Hits;
041 import com.liferay.portal.kernel.search.HitsImpl;
042 import com.liferay.portal.kernel.search.IndexSearcher;
043 import com.liferay.portal.kernel.search.ParseException;
044 import com.liferay.portal.kernel.search.Query;
045 import com.liferay.portal.kernel.search.QueryConfig;
046 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047 import com.liferay.portal.kernel.search.SearchContext;
048 import com.liferay.portal.kernel.search.SearchException;
049 import com.liferay.portal.kernel.search.Sort;
050 import com.liferay.portal.kernel.search.facet.Facet;
051 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052 import com.liferay.portal.kernel.search.facet.RangeFacet;
053 import com.liferay.portal.kernel.search.facet.SimpleFacet;
054 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056 import com.liferay.portal.kernel.util.ArrayUtil;
057 import com.liferay.portal.kernel.util.StringPool;
058 import com.liferay.portal.kernel.util.StringUtil;
059 import com.liferay.portal.kernel.util.Time;
060 import com.liferay.portal.kernel.util.Validator;
061 import com.liferay.portal.search.BoboFacetCollector;
062 import com.liferay.portal.util.PropsValues;
063
064 import java.io.IOException;
065
066 import java.util.ArrayList;
067 import java.util.List;
068 import java.util.Locale;
069 import java.util.Map;
070
071 import org.apache.lucene.document.NumericField;
072 import org.apache.lucene.index.IndexReader;
073 import org.apache.lucene.search.BooleanQuery;
074 import org.apache.lucene.search.Explanation;
075 import org.apache.lucene.search.ScoreDoc;
076 import org.apache.lucene.search.SortField;
077 import org.apache.lucene.search.TopFieldDocs;
078
079
082 public class LuceneIndexSearcherImpl implements IndexSearcher {
083
084 public Hits search(
085 String searchEngineId, long companyId, Query query, Sort[] sorts,
086 int start, int end)
087 throws SearchException {
088
089 if (_log.isDebugEnabled()) {
090 _log.debug("Query " + query);
091 }
092
093 Hits hits = null;
094
095 org.apache.lucene.search.IndexSearcher indexSearcher = null;
096 org.apache.lucene.search.Sort luceneSort = null;
097
098 try {
099 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
100
101 if (sorts != null) {
102 SortField[] sortFields = new SortField[sorts.length];
103
104 for (int i = 0; i < sorts.length; i++) {
105 Sort sort = sorts[i];
106
107 sortFields[i] = new SortField(
108 sort.getFieldName(), sort.getType(), sort.isReverse());
109 }
110
111 luceneSort = new org.apache.lucene.search.Sort(sortFields);
112 }
113 else {
114 luceneSort = new org.apache.lucene.search.Sort();
115 }
116
117 long startTime = System.currentTimeMillis();
118
119 TopFieldDocs topFieldDocs = indexSearcher.search(
120 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
121 query),
122 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
123
124 long endTime = System.currentTimeMillis();
125
126 float searchTime = (float)(endTime - startTime) / Time.SECOND;
127
128 hits = toHits(
129 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
130 searchTime, start, end);
131 }
132 catch (BooleanQuery.TooManyClauses tmc) {
133 int maxClauseCount = BooleanQuery.getMaxClauseCount();
134
135 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
136
137 try {
138 long startTime = System.currentTimeMillis();
139
140 TopFieldDocs topFieldDocs = indexSearcher.search(
141 (org.apache.lucene.search.Query)
142 QueryTranslatorUtil.translate(query),
143 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
144
145 long endTime = System.currentTimeMillis();
146
147 float searchTime = (float)(endTime - startTime) / Time.SECOND;
148
149 hits = toHits(
150 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
151 searchTime, start, end);
152 }
153 catch (Exception e) {
154 throw new SearchException(e);
155 }
156 finally {
157 BooleanQuery.setMaxClauseCount(maxClauseCount);
158 }
159 }
160 catch (ParseException pe) {
161 _log.error("Query " + query, pe);
162
163 return new HitsImpl();
164 }
165 catch (Exception e) {
166 throw new SearchException(e);
167 }
168 finally {
169 if (indexSearcher != null) {
170 try {
171 indexSearcher.close();
172 }
173 catch (IOException ioe) {
174 _log.error(ioe, ioe);
175 }
176 }
177 }
178
179 if (_log.isDebugEnabled()) {
180 _log.debug(
181 "Search found " + hits.getLength() + " results in " +
182 hits.getSearchTime() + "ms");
183 }
184
185 return hits;
186 }
187
188 public Hits search(SearchContext searchContext, Query query)
189 throws SearchException {
190
191 if (_log.isDebugEnabled()) {
192 _log.debug("Query " + query);
193 }
194
195 Hits hits = null;
196
197 org.apache.lucene.search.IndexSearcher indexSearcher = null;
198 Map<String, Facet> facets = null;
199 BrowseRequest browseRequest = null;
200 Browsable browsable = null;
201
202 try {
203 indexSearcher = LuceneHelperUtil.getSearcher(
204 searchContext.getCompanyId(), true);
205
206 List<FacetHandler<?>> facetHandlers =
207 new ArrayList<FacetHandler<?>>();
208
209 facets = searchContext.getFacets();
210
211 for (Facet facet : facets.values()) {
212 if (facet.isStatic()) {
213 continue;
214 }
215
216 FacetConfiguration facetConfiguration =
217 facet.getFacetConfiguration();
218
219 if (facet instanceof MultiValueFacet) {
220 MultiValueFacetHandler multiValueFacetHandler =
221 new MultiValueFacetHandler(
222 facetConfiguration.getFieldName(),
223 facetConfiguration.getFieldName());
224
225 JSONObject dataJSONObject = facetConfiguration.getData();
226
227 if (dataJSONObject.has("maxTerms")) {
228 multiValueFacetHandler.setMaxItems(
229 dataJSONObject.getInt("maxTerms"));
230 }
231
232 facetHandlers.add(multiValueFacetHandler);
233 }
234 else if (facet instanceof RangeFacet) {
235 List<String> ranges = new ArrayList<String>();
236
237 JSONObject dataJSONObject = facetConfiguration.getData();
238
239 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
240 "ranges");
241
242 if (rangesJSONArray != null) {
243 for (int i = 0; i < rangesJSONArray.length(); i++) {
244 JSONObject rangeJSONObject =
245 rangesJSONArray.getJSONObject(i);
246
247 ranges.add(rangeJSONObject.getString("range"));
248 }
249 }
250
251 RangeFacetHandler rangeFacetHandler =
252 new RangeFacetHandler(
253 facetConfiguration.getFieldName(),
254 facetConfiguration.getFieldName(), ranges);
255
256 rangeFacetHandler.setTermCountSize(TermCountSize.large);
257
258 facetHandlers.add(rangeFacetHandler);
259 }
260 else if (facet instanceof SimpleFacet) {
261 SimpleFacetHandler simpleFacetHandler =
262 new SimpleFacetHandler(
263 facetConfiguration.getFieldName(),
264 facetConfiguration.getFieldName());
265
266 facetHandlers.add(simpleFacetHandler);
267 }
268 }
269
270 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
271 indexSearcher.getIndexReader(), facetHandlers);
272
273 SortField[] sortFields = new SortField[0];
274
275 Sort[] sorts = searchContext.getSorts();
276
277 if (sorts != null) {
278 sortFields = new SortField[sorts.length];
279
280 for (int i = 0; i < sorts.length; i++) {
281 Sort sort = sorts[i];
282
283 sortFields[i] = new SortField(
284 sort.getFieldName(), sort.getType(), sort.isReverse());
285 }
286 }
287
288 browseRequest = new BrowseRequest();
289
290 for (Facet facet : facets.values()) {
291 if (facet.isStatic()) {
292 continue;
293 }
294
295 FacetConfiguration facetConfiguration =
296 facet.getFacetConfiguration();
297
298 FacetSpec facetSpec = new FacetSpec();
299
300 facetSpec.setOrderBy(
301 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
302
303 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
304 }
305
306 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
307 browseRequest.setOffset(0);
308 browseRequest.setQuery(
309 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
310 query));
311 browseRequest.setSort(sortFields);
312
313 browsable = new BoboBrowser(boboIndexReader);
314
315 long startTime = System.currentTimeMillis();
316
317 BrowseResult browseResult = browsable.browse(browseRequest);
318
319 BrowseHit[] browseHits = browseResult.getHits();
320
321 long endTime = System.currentTimeMillis();
322
323 float searchTime = (float)(endTime - startTime) / Time.SECOND;
324
325 hits = toHits(
326 indexSearcher, new HitDocs(browseHits), query, startTime,
327 searchTime, searchContext.getStart(), searchContext.getEnd());
328
329 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
330
331 for (Map.Entry<String, FacetAccessible> entry :
332 facetMap.entrySet()) {
333
334 Facet facet = facets.get(entry.getKey());
335
336 FacetAccessible facetAccessible = entry.getValue();
337
338 FacetCollector facetCollector = new BoboFacetCollector(
339 entry.getKey(), facetAccessible);
340
341 facet.setFacetCollector(facetCollector);
342 }
343 }
344 catch (BooleanQuery.TooManyClauses tmc) {
345 int maxClauseCount = BooleanQuery.getMaxClauseCount();
346
347 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
348
349 try {
350 long startTime = System.currentTimeMillis();
351
352 BrowseResult result = browsable.browse(browseRequest);
353
354 BrowseHit[] browseHits = result.getHits();
355
356 long endTime = System.currentTimeMillis();
357
358 float searchTime = (float)(endTime - startTime) / Time.SECOND;
359
360 hits = toHits(
361 indexSearcher, new HitDocs(browseHits), query, startTime,
362 searchTime, searchContext.getStart(),
363 searchContext.getEnd());
364
365 Map<String, FacetAccessible> facetMap = result.getFacetMap();
366
367 for (Map.Entry<String, FacetAccessible> entry :
368 facetMap.entrySet()) {
369
370 Facet facet = facets.get(entry.getKey());
371
372 FacetAccessible facetAccessible = entry.getValue();
373
374 FacetCollector facetCollector = new BoboFacetCollector(
375 entry.getKey(), facetAccessible);
376
377 facet.setFacetCollector(facetCollector);
378 }
379 }
380 catch (Exception e) {
381 throw new SearchException(e);
382 }
383 finally {
384 BooleanQuery.setMaxClauseCount(maxClauseCount);
385 }
386 }
387 catch (ParseException pe) {
388 _log.error("Query " + query, pe);
389
390 return new HitsImpl();
391 }
392 catch (Exception e) {
393 throw new SearchException(e);
394 }
395 finally {
396 if (browsable != null) {
397 try {
398 browsable.close();
399 }
400 catch (IOException ioe) {
401 _log.error(ioe, ioe);
402 }
403 }
404
405 if (indexSearcher != null) {
406 try {
407 indexSearcher.close();
408 }
409 catch (IOException ioe) {
410 _log.error(ioe, ioe);
411 }
412 }
413 }
414
415 if (_log.isDebugEnabled()) {
416 _log.debug(
417 "Search found " + hits.getLength() + " results in " +
418 hits.getSearchTime() + "ms");
419 }
420
421 return hits;
422 }
423
424 protected DocumentImpl getDocument(
425 org.apache.lucene.document.Document oldDocument) {
426
427 DocumentImpl newDocument = new DocumentImpl();
428
429 List<org.apache.lucene.document.Fieldable> oldFieldables =
430 oldDocument.getFields();
431
432 for (org.apache.lucene.document.Fieldable oldFieldable :
433 oldFieldables) {
434
435 Field newField = null;
436
437 String[] values = oldDocument.getValues(oldFieldable.name());
438
439 if ((values != null) && (values.length > 1)) {
440 newField = new Field(oldFieldable.name(), values);
441 }
442 else {
443 newField = new Field(
444 oldFieldable.name(), oldFieldable.stringValue());
445 }
446
447 newField.setNumeric(oldFieldable instanceof NumericField);
448 newField.setTokenized(oldFieldable.isTokenized());
449
450 newDocument.add(newField);
451 }
452
453 return newDocument;
454 }
455
456 protected String[] getQueryTerms(Query query) {
457 String[] queryTerms = new String[0];
458
459 try {
460 queryTerms = LuceneHelperUtil.getQueryTerms(
461 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
462 query));
463 }
464 catch (ParseException pe) {
465 _log.error("Query " + query, pe);
466 }
467
468 return queryTerms;
469 }
470
471 protected String getSnippet(
472 org.apache.lucene.document.Document doc, Query query, String field,
473 Locale locale)
474 throws IOException {
475
476 String localizedName = DocumentImpl.getLocalizedName(locale, field);
477
478 String[] values = doc.getValues(localizedName);
479
480 if ((values == null) || (values.length == 0)) {
481 values = doc.getValues(field);
482 }
483
484 String snippet = null;
485
486 if (Validator.isNull(values)) {
487 return snippet;
488 }
489
490 String s = StringUtil.merge(values);
491
492 try {
493 snippet = LuceneHelperUtil.getSnippet(
494 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
495 query),
496 field, s);
497 }
498 catch (ParseException pe) {
499 _log.error("Query " + query, pe);
500 }
501
502 return snippet;
503 }
504
505 protected Hits toHits(
506 org.apache.lucene.search.IndexSearcher indexSearcher,
507 HitDocs hitDocs, Query query, long startTime,
508 float searchTime, int start, int end)
509 throws IOException, ParseException {
510
511 int length = hitDocs.getTotalHits();
512
513 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
514 start = 0;
515 end = length;
516 }
517
518 String[] queryTerms = getQueryTerms(query);
519
520 IndexReader indexReader = indexSearcher.getIndexReader();
521
522 List<String> indexedFieldNames = new ArrayList<String> (
523 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
524
525 org.apache.lucene.search.Query luceneQuery =
526 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
527 query);
528
529 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
530 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
531
532 Hits hits = new HitsImpl();
533
534 if ((start > -1) && (start <= end)) {
535 if (end > length) {
536 end = length;
537 }
538
539 if (start > end) {
540 start = end;
541 }
542
543 int subsetTotal = end - start;
544
545 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
546 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
547 }
548
549 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
550 List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
551 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
552
553 QueryConfig queryConfig = query.getQueryConfig();
554
555 for (int i = start; i < end; i++) {
556 if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
557 break;
558 }
559
560 int docId = hitDocs.getDocId(i);
561
562 org.apache.lucene.document.Document document =
563 indexSearcher.doc(docId);
564
565 Document subsetDocument = getDocument(document);
566
567 String subsetSnippet = StringPool.BLANK;
568
569 if (queryConfig.isHighlightEnabled()) {
570 subsetSnippet = getSnippet(
571 document, query, Field.CONTENT,
572 queryConfig.getLocale());
573 }
574
575 subsetDocument.addText(Field.SNIPPET, subsetSnippet);
576
577 subsetSnippets.add(subsetSnippet);
578
579 subsetDocs.add(subsetDocument);
580
581 Float subsetScore = hitDocs.getScore(i);
582
583 if (scoredFieldNamesCount > 0) {
584 subsetScore = subsetScore / scoredFieldNamesCount;
585 }
586
587 subsetScores.add(subsetScore);
588
589 if (_log.isDebugEnabled()) {
590 try {
591 Explanation explanation = indexSearcher.explain(
592 luceneQuery, docId);
593
594 _log.debug(explanation.toString());
595 }
596 catch (Exception e) {
597 }
598 }
599 }
600
601 hits.setStart(startTime);
602 hits.setSearchTime(searchTime);
603 hits.setQuery(query);
604 hits.setQueryTerms(queryTerms);
605 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
606 hits.setLength(length);
607 hits.setSnippets(
608 subsetSnippets.toArray(new String[subsetSnippets.size()]));
609 hits.setScores(
610 subsetScores.toArray(new Float[subsetScores.size()]));
611 }
612
613 return hits;
614 }
615
616 private static Log _log = LogFactoryUtil.getLog(
617 LuceneIndexSearcherImpl.class);
618
619 private class HitDocs {
620
621 public HitDocs(BrowseHit[] browseHits) {
622 _browseHits = browseHits;
623 }
624
625 public HitDocs(TopFieldDocs topFieldDocs) {
626 _topFieldDocs = topFieldDocs;
627 }
628
629 public int getDocId(int i) {
630 if (_topFieldDocs != null) {
631 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
632
633 return scoreDoc.doc;
634 }
635 else if (_browseHits != null) {
636 return _browseHits[i].getDocid();
637 }
638
639 throw new IllegalStateException();
640 }
641
642 public float getScore(int i) {
643 if (_topFieldDocs != null) {
644 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
645
646 return scoreDoc.score;
647 }
648 else if (_browseHits != null) {
649 return _browseHits[i].getScore();
650 }
651
652 throw new IllegalStateException();
653 }
654
655 public int getTotalHits() {
656 if (_topFieldDocs != null) {
657 return _topFieldDocs.totalHits;
658 }
659 else if (_browseHits != null) {
660 return _browseHits.length;
661 }
662
663 throw new IllegalStateException();
664 }
665
666 private BrowseHit[] _browseHits;
667 private TopFieldDocs _topFieldDocs;
668
669 }
670
671 }