001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.Browsable;
020 import com.browseengine.bobo.api.BrowseHit;
021 import com.browseengine.bobo.api.BrowseRequest;
022 import com.browseengine.bobo.api.BrowseResult;
023 import com.browseengine.bobo.api.FacetAccessible;
024 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031
032 import com.liferay.portal.kernel.dao.orm.QueryUtil;
033 import com.liferay.portal.kernel.json.JSONArray;
034 import com.liferay.portal.kernel.json.JSONObject;
035 import com.liferay.portal.kernel.log.Log;
036 import com.liferay.portal.kernel.log.LogFactoryUtil;
037 import com.liferay.portal.kernel.search.Document;
038 import com.liferay.portal.kernel.search.DocumentImpl;
039 import com.liferay.portal.kernel.search.Field;
040 import com.liferay.portal.kernel.search.Hits;
041 import com.liferay.portal.kernel.search.HitsImpl;
042 import com.liferay.portal.kernel.search.IndexSearcher;
043 import com.liferay.portal.kernel.search.ParseException;
044 import com.liferay.portal.kernel.search.Query;
045 import com.liferay.portal.kernel.search.QueryConfig;
046 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047 import com.liferay.portal.kernel.search.SearchContext;
048 import com.liferay.portal.kernel.search.SearchException;
049 import com.liferay.portal.kernel.search.Sort;
050 import com.liferay.portal.kernel.search.facet.Facet;
051 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052 import com.liferay.portal.kernel.search.facet.RangeFacet;
053 import com.liferay.portal.kernel.search.facet.SimpleFacet;
054 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056 import com.liferay.portal.kernel.util.ArrayUtil;
057 import com.liferay.portal.kernel.util.StringPool;
058 import com.liferay.portal.kernel.util.StringUtil;
059 import com.liferay.portal.kernel.util.Time;
060 import com.liferay.portal.kernel.util.Validator;
061 import com.liferay.portal.search.BoboFacetCollector;
062 import com.liferay.portal.util.PropsValues;
063
064 import java.io.IOException;
065
066 import java.util.ArrayList;
067 import java.util.List;
068 import java.util.Locale;
069 import java.util.Map;
070
071 import org.apache.lucene.document.NumericField;
072 import org.apache.lucene.index.IndexReader;
073 import org.apache.lucene.search.BooleanQuery;
074 import org.apache.lucene.search.Explanation;
075 import org.apache.lucene.search.ScoreDoc;
076 import org.apache.lucene.search.SortField;
077 import org.apache.lucene.search.TopFieldDocs;
078
079
082 public class LuceneIndexSearcherImpl implements IndexSearcher {
083
084 public Hits search(SearchContext searchContext, Query query)
085 throws SearchException {
086
087 if (_log.isDebugEnabled()) {
088 _log.debug("Query " + query);
089 }
090
091 Hits hits = null;
092
093 org.apache.lucene.search.IndexSearcher indexSearcher = null;
094 Map<String, Facet> facets = null;
095 BrowseRequest browseRequest = null;
096 Browsable browsable = null;
097
098 try {
099 indexSearcher = LuceneHelperUtil.getSearcher(
100 searchContext.getCompanyId(), true);
101
102 List<FacetHandler<?>> facetHandlers =
103 new ArrayList<FacetHandler<?>>();
104
105 facets = searchContext.getFacets();
106
107 for (Facet facet : facets.values()) {
108 if (facet.isStatic()) {
109 continue;
110 }
111
112 FacetConfiguration facetConfiguration =
113 facet.getFacetConfiguration();
114
115 if (facet instanceof MultiValueFacet) {
116 MultiValueFacetHandler multiValueFacetHandler =
117 new MultiValueFacetHandler(
118 facetConfiguration.getFieldName(),
119 facetConfiguration.getFieldName());
120
121 JSONObject dataJSONObject = facetConfiguration.getData();
122
123 if (dataJSONObject.has("maxTerms")) {
124 multiValueFacetHandler.setMaxItems(
125 dataJSONObject.getInt("maxTerms"));
126 }
127
128 facetHandlers.add(multiValueFacetHandler);
129 }
130 else if (facet instanceof RangeFacet) {
131 List<String> ranges = new ArrayList<String>();
132
133 JSONObject dataJSONObject = facetConfiguration.getData();
134
135 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
136 "ranges");
137
138 if (rangesJSONArray != null) {
139 for (int i = 0; i < rangesJSONArray.length(); i++) {
140 JSONObject rangeJSONObject =
141 rangesJSONArray.getJSONObject(i);
142
143 ranges.add(rangeJSONObject.getString("range"));
144 }
145 }
146
147 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
148 facetConfiguration.getFieldName(),
149 facetConfiguration.getFieldName(), ranges);
150
151 rangeFacetHandler.setTermCountSize(TermCountSize.large);
152
153 facetHandlers.add(rangeFacetHandler);
154 }
155 else if (facet instanceof SimpleFacet) {
156 SimpleFacetHandler simpleFacetHandler =
157 new SimpleFacetHandler(
158 facetConfiguration.getFieldName(),
159 facetConfiguration.getFieldName());
160
161 facetHandlers.add(simpleFacetHandler);
162 }
163 }
164
165 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
166 indexSearcher.getIndexReader(), facetHandlers);
167
168 SortField[] sortFields = new SortField[0];
169
170 Sort[] sorts = searchContext.getSorts();
171
172 if (sorts != null) {
173 sortFields = new SortField[sorts.length];
174
175 for (int i = 0; i < sorts.length; i++) {
176 Sort sort = sorts[i];
177
178 sortFields[i] = new SortField(
179 sort.getFieldName(), sort.getType(), sort.isReverse());
180 }
181 }
182
183 browseRequest = new BrowseRequest();
184
185 for (Facet facet : facets.values()) {
186 if (facet.isStatic()) {
187 continue;
188 }
189
190 FacetConfiguration facetConfiguration =
191 facet.getFacetConfiguration();
192
193 FacetSpec facetSpec = new FacetSpec();
194
195 facetSpec.setOrderBy(
196 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
197
198 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
199 }
200
201 browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
202 browseRequest.setOffset(0);
203 browseRequest.setQuery(
204 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
205 query));
206 browseRequest.setSort(sortFields);
207
208 browsable = new BoboBrowser(boboIndexReader);
209
210 long startTime = System.currentTimeMillis();
211
212 BrowseResult browseResult = browsable.browse(browseRequest);
213
214 BrowseHit[] browseHits = browseResult.getHits();
215
216 long endTime = System.currentTimeMillis();
217
218 float searchTime = (float)(endTime - startTime) / Time.SECOND;
219
220 hits = toHits(
221 indexSearcher, new HitDocs(browseHits), query, startTime,
222 searchTime, searchContext.getStart(), searchContext.getEnd());
223
224 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
225
226 for (Map.Entry<String, FacetAccessible> entry :
227 facetMap.entrySet()) {
228
229 Facet facet = facets.get(entry.getKey());
230
231 FacetAccessible facetAccessible = entry.getValue();
232
233 FacetCollector facetCollector = new BoboFacetCollector(
234 entry.getKey(), facetAccessible);
235
236 facet.setFacetCollector(facetCollector);
237 }
238 }
239 catch (BooleanQuery.TooManyClauses tmc) {
240 int maxClauseCount = BooleanQuery.getMaxClauseCount();
241
242 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
243
244 try {
245 long startTime = System.currentTimeMillis();
246
247 BrowseResult result = browsable.browse(browseRequest);
248
249 BrowseHit[] browseHits = result.getHits();
250
251 long endTime = System.currentTimeMillis();
252
253 float searchTime = (float)(endTime - startTime) / Time.SECOND;
254
255 hits = toHits(
256 indexSearcher, new HitDocs(browseHits), query, startTime,
257 searchTime, searchContext.getStart(),
258 searchContext.getEnd());
259
260 Map<String, FacetAccessible> facetMap = result.getFacetMap();
261
262 for (Map.Entry<String, FacetAccessible> entry :
263 facetMap.entrySet()) {
264
265 Facet facet = facets.get(entry.getKey());
266
267 FacetAccessible facetAccessible = entry.getValue();
268
269 FacetCollector facetCollector = new BoboFacetCollector(
270 entry.getKey(), facetAccessible);
271
272 facet.setFacetCollector(facetCollector);
273 }
274 }
275 catch (Exception e) {
276 throw new SearchException(e);
277 }
278 finally {
279 BooleanQuery.setMaxClauseCount(maxClauseCount);
280 }
281 }
282 catch (ParseException pe) {
283 _log.error("Query " + query, pe);
284
285 return new HitsImpl();
286 }
287 catch (Exception e) {
288 throw new SearchException(e);
289 }
290 finally {
291 if (browsable != null) {
292 try {
293 browsable.close();
294 }
295 catch (IOException ioe) {
296 _log.error(ioe, ioe);
297 }
298 }
299
300 if (indexSearcher != null) {
301 try {
302 indexSearcher.close();
303 }
304 catch (IOException ioe) {
305 _log.error(ioe, ioe);
306 }
307 }
308 }
309
310 if (_log.isDebugEnabled()) {
311 _log.debug(
312 "Search found " + hits.getLength() + " results in " +
313 hits.getSearchTime() + "ms");
314 }
315
316 return hits;
317 }
318
319 public Hits search(
320 String searchEngineId, long companyId, Query query, Sort[] sorts,
321 int start, int end)
322 throws SearchException {
323
324 if (_log.isDebugEnabled()) {
325 _log.debug("Query " + query);
326 }
327
328 Hits hits = null;
329
330 org.apache.lucene.search.IndexSearcher indexSearcher = null;
331 org.apache.lucene.search.Sort luceneSort = null;
332
333 try {
334 indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
335
336 if (sorts != null) {
337 SortField[] sortFields = new SortField[sorts.length];
338
339 for (int i = 0; i < sorts.length; i++) {
340 Sort sort = sorts[i];
341
342 sortFields[i] = new SortField(
343 sort.getFieldName(), sort.getType(), sort.isReverse());
344 }
345
346 luceneSort = new org.apache.lucene.search.Sort(sortFields);
347 }
348 else {
349 luceneSort = new org.apache.lucene.search.Sort();
350 }
351
352 long startTime = System.currentTimeMillis();
353
354 TopFieldDocs topFieldDocs = indexSearcher.search(
355 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
356 query),
357 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
358
359 long endTime = System.currentTimeMillis();
360
361 float searchTime = (float)(endTime - startTime) / Time.SECOND;
362
363 hits = toHits(
364 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
365 searchTime, start, end);
366 }
367 catch (BooleanQuery.TooManyClauses tmc) {
368 int maxClauseCount = BooleanQuery.getMaxClauseCount();
369
370 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
371
372 try {
373 long startTime = System.currentTimeMillis();
374
375 TopFieldDocs topFieldDocs = indexSearcher.search(
376 (org.apache.lucene.search.Query)
377 QueryTranslatorUtil.translate(query),
378 null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
379
380 long endTime = System.currentTimeMillis();
381
382 float searchTime = (float)(endTime - startTime) / Time.SECOND;
383
384 hits = toHits(
385 indexSearcher, new HitDocs(topFieldDocs), query, startTime,
386 searchTime, start, end);
387 }
388 catch (Exception e) {
389 throw new SearchException(e);
390 }
391 finally {
392 BooleanQuery.setMaxClauseCount(maxClauseCount);
393 }
394 }
395 catch (ParseException pe) {
396 _log.error("Query " + query, pe);
397
398 return new HitsImpl();
399 }
400 catch (Exception e) {
401 throw new SearchException(e);
402 }
403 finally {
404 if (indexSearcher != null) {
405 try {
406 indexSearcher.close();
407 }
408 catch (IOException ioe) {
409 _log.error(ioe, ioe);
410 }
411 }
412 }
413
414 if (_log.isDebugEnabled()) {
415 _log.debug(
416 "Search found " + hits.getLength() + " results in " +
417 hits.getSearchTime() + "ms");
418 }
419
420 return hits;
421 }
422
423 protected DocumentImpl getDocument(
424 org.apache.lucene.document.Document oldDocument) {
425
426 DocumentImpl newDocument = new DocumentImpl();
427
428 List<org.apache.lucene.document.Fieldable> oldFieldables =
429 oldDocument.getFields();
430
431 for (org.apache.lucene.document.Fieldable oldFieldable :
432 oldFieldables) {
433
434 Field newField = null;
435
436 String[] values = oldDocument.getValues(oldFieldable.name());
437
438 if ((values != null) && (values.length > 1)) {
439 newField = new Field(oldFieldable.name(), values);
440 }
441 else {
442 newField = new Field(
443 oldFieldable.name(), oldFieldable.stringValue());
444 }
445
446 newField.setNumeric(oldFieldable instanceof NumericField);
447 newField.setTokenized(oldFieldable.isTokenized());
448
449 newDocument.add(newField);
450 }
451
452 return newDocument;
453 }
454
455 protected String[] getQueryTerms(Query query) {
456 String[] queryTerms = new String[0];
457
458 try {
459 queryTerms = LuceneHelperUtil.getQueryTerms(
460 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
461 query));
462 }
463 catch (ParseException pe) {
464 _log.error("Query " + query, pe);
465 }
466
467 return queryTerms;
468 }
469
470 protected String getSnippet(
471 org.apache.lucene.document.Document doc, Query query, String field,
472 Locale locale)
473 throws IOException {
474
475 String localizedName = DocumentImpl.getLocalizedName(locale, field);
476
477 String[] values = doc.getValues(localizedName);
478
479 if ((values == null) || (values.length == 0)) {
480 values = doc.getValues(field);
481 }
482
483 String snippet = null;
484
485 if (Validator.isNull(values)) {
486 return snippet;
487 }
488
489 String s = StringUtil.merge(values);
490
491 try {
492 snippet = LuceneHelperUtil.getSnippet(
493 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
494 query),
495 field, s);
496 }
497 catch (ParseException pe) {
498 _log.error("Query " + query, pe);
499 }
500
501 return snippet;
502 }
503
504 protected Hits toHits(
505 org.apache.lucene.search.IndexSearcher indexSearcher,
506 HitDocs hitDocs, Query query, long startTime, float searchTime,
507 int start, int end)
508 throws IOException, ParseException {
509
510 int length = hitDocs.getTotalHits();
511
512 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
513 start = 0;
514 end = length;
515 }
516
517 String[] queryTerms = getQueryTerms(query);
518
519 IndexReader indexReader = indexSearcher.getIndexReader();
520
521 List<String> indexedFieldNames = new ArrayList<String> (
522 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
523
524 org.apache.lucene.search.Query luceneQuery =
525 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
526 query);
527
528 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
529 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
530
531 Hits hits = new HitsImpl();
532
533 if ((start > -1) && (start <= end)) {
534 if (end > length) {
535 end = length;
536 }
537
538 if (start > end) {
539 start = end;
540 }
541
542 int subsetTotal = end - start;
543
544 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
545 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
546 }
547
548 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
549 List<String> subsetSnippets = new ArrayList<String>(subsetTotal);
550 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
551
552 QueryConfig queryConfig = query.getQueryConfig();
553
554 for (int i = start; i < end; i++) {
555 if (i >= PropsValues.INDEX_SEARCH_LIMIT) {
556 break;
557 }
558
559 int docId = hitDocs.getDocId(i);
560
561 org.apache.lucene.document.Document document =
562 indexSearcher.doc(docId);
563
564 Document subsetDocument = getDocument(document);
565
566 String subsetSnippet = StringPool.BLANK;
567
568 if (queryConfig.isHighlightEnabled()) {
569 subsetSnippet = getSnippet(
570 document, query, Field.CONTENT,
571 queryConfig.getLocale());
572 }
573
574 subsetDocument.addText(Field.SNIPPET, subsetSnippet);
575
576 subsetSnippets.add(subsetSnippet);
577
578 subsetDocs.add(subsetDocument);
579
580 Float subsetScore = hitDocs.getScore(i);
581
582 if (scoredFieldNamesCount > 0) {
583 subsetScore = subsetScore / scoredFieldNamesCount;
584 }
585
586 subsetScores.add(subsetScore);
587
588 if (_log.isDebugEnabled()) {
589 try {
590 Explanation explanation = indexSearcher.explain(
591 luceneQuery, docId);
592
593 _log.debug(explanation.toString());
594 }
595 catch (Exception e) {
596 }
597 }
598 }
599
600 hits.setStart(startTime);
601 hits.setSearchTime(searchTime);
602 hits.setQuery(query);
603 hits.setQueryTerms(queryTerms);
604 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
605 hits.setLength(length);
606 hits.setSnippets(
607 subsetSnippets.toArray(new String[subsetSnippets.size()]));
608 hits.setScores(
609 subsetScores.toArray(new Float[subsetScores.size()]));
610 }
611
612 return hits;
613 }
614
615 private static Log _log = LogFactoryUtil.getLog(
616 LuceneIndexSearcherImpl.class);
617
618 private class HitDocs {
619
620 public HitDocs(BrowseHit[] browseHits) {
621 _browseHits = browseHits;
622 }
623
624 public HitDocs(TopFieldDocs topFieldDocs) {
625 _topFieldDocs = topFieldDocs;
626 }
627
628 public int getDocId(int i) {
629 if (_topFieldDocs != null) {
630 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
631
632 return scoreDoc.doc;
633 }
634 else if (_browseHits != null) {
635 return _browseHits[i].getDocid();
636 }
637
638 throw new IllegalStateException();
639 }
640
641 public float getScore(int i) {
642 if (_topFieldDocs != null) {
643 ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
644
645 return scoreDoc.score;
646 }
647 else if (_browseHits != null) {
648 return _browseHits[i].getScore();
649 }
650
651 throw new IllegalStateException();
652 }
653
654 public int getTotalHits() {
655 if (_topFieldDocs != null) {
656 return _topFieldDocs.totalHits;
657 }
658 else if (_browseHits != null) {
659 return _browseHits.length;
660 }
661
662 throw new IllegalStateException();
663 }
664
665 private BrowseHit[] _browseHits;
666 private TopFieldDocs _topFieldDocs;
667
668 }
669
670 }