001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.BoboSubBrowser;
020 import com.browseengine.bobo.api.Browsable;
021 import com.browseengine.bobo.api.BrowseHit;
022 import com.browseengine.bobo.api.BrowseRequest;
023 import com.browseengine.bobo.api.BrowseResult;
024 import com.browseengine.bobo.api.FacetAccessible;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
029 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032
033 import com.liferay.portal.kernel.dao.orm.QueryUtil;
034 import com.liferay.portal.kernel.dao.search.SearchPaginationUtil;
035 import com.liferay.portal.kernel.json.JSONArray;
036 import com.liferay.portal.kernel.json.JSONObject;
037 import com.liferay.portal.kernel.log.Log;
038 import com.liferay.portal.kernel.log.LogFactoryUtil;
039 import com.liferay.portal.kernel.search.BaseIndexSearcher;
040 import com.liferay.portal.kernel.search.Document;
041 import com.liferay.portal.kernel.search.DocumentImpl;
042 import com.liferay.portal.kernel.search.Field;
043 import com.liferay.portal.kernel.search.Hits;
044 import com.liferay.portal.kernel.search.HitsImpl;
045 import com.liferay.portal.kernel.search.ParseException;
046 import com.liferay.portal.kernel.search.Query;
047 import com.liferay.portal.kernel.search.QueryConfig;
048 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
049 import com.liferay.portal.kernel.search.SearchContext;
050 import com.liferay.portal.kernel.search.SearchException;
051 import com.liferay.portal.kernel.search.Sort;
052 import com.liferay.portal.kernel.search.facet.Facet;
053 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
054 import com.liferay.portal.kernel.search.facet.RangeFacet;
055 import com.liferay.portal.kernel.search.facet.SimpleFacet;
056 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
057 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
058 import com.liferay.portal.kernel.util.ArrayUtil;
059 import com.liferay.portal.kernel.util.ReflectionUtil;
060 import com.liferay.portal.kernel.util.SetUtil;
061 import com.liferay.portal.kernel.util.StringPool;
062 import com.liferay.portal.kernel.util.StringUtil;
063 import com.liferay.portal.kernel.util.Time;
064 import com.liferay.portal.kernel.util.Validator;
065 import com.liferay.portal.search.BoboFacetCollector;
066 import com.liferay.portal.util.PropsValues;
067
068 import java.io.IOException;
069
070 import java.util.ArrayList;
071 import java.util.Collections;
072 import java.util.HashSet;
073 import java.util.List;
074 import java.util.Locale;
075 import java.util.Map;
076 import java.util.Set;
077
078 import org.apache.lucene.document.FieldSelector;
079 import org.apache.lucene.document.NumericField;
080 import org.apache.lucene.document.SetBasedFieldSelector;
081 import org.apache.lucene.index.IndexReader;
082 import org.apache.lucene.search.BooleanQuery;
083 import org.apache.lucene.search.Explanation;
084 import org.apache.lucene.search.IndexSearcher;
085 import org.apache.lucene.search.SortField;
086 import org.apache.lucene.search.highlight.Formatter;
087 import org.apache.lucene.search.highlight.TokenGroup;
088
089
092 public class LuceneIndexSearcher extends BaseIndexSearcher {
093
094 @Override
095 public Hits search(SearchContext searchContext, Query query)
096 throws SearchException {
097
098 if (_log.isDebugEnabled()) {
099 _log.debug("Query " + query);
100 }
101
102 Hits hits = null;
103
104 IndexSearcher indexSearcher = null;
105 Map<String, Facet> facets = null;
106 BoboBrowser boboBrowser = null;
107 BrowseRequest browseRequest = null;
108
109 try {
110 indexSearcher = LuceneHelperUtil.getIndexSearcher(
111 searchContext.getCompanyId());
112
113 List<FacetHandler<?>> facetHandlers =
114 new ArrayList<FacetHandler<?>>();
115
116 facets = searchContext.getFacets();
117
118 for (Facet facet : facets.values()) {
119 if (facet.isStatic()) {
120 continue;
121 }
122
123 FacetConfiguration facetConfiguration =
124 facet.getFacetConfiguration();
125
126 if (facet instanceof MultiValueFacet) {
127 MultiValueFacetHandler multiValueFacetHandler =
128 new MultiValueFacetHandler(
129 facetConfiguration.getFieldName(),
130 facetConfiguration.getFieldName());
131
132 JSONObject dataJSONObject = facetConfiguration.getData();
133
134 if (dataJSONObject.has("maxTerms")) {
135 multiValueFacetHandler.setMaxItems(
136 dataJSONObject.getInt("maxTerms"));
137 }
138
139 facetHandlers.add(multiValueFacetHandler);
140 }
141 else if (facet instanceof RangeFacet) {
142 List<String> ranges = new ArrayList<String>();
143
144 JSONObject dataJSONObject = facetConfiguration.getData();
145
146 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
147 "ranges");
148
149 if (rangesJSONArray != null) {
150 for (int i = 0; i < rangesJSONArray.length(); i++) {
151 JSONObject rangeJSONObject =
152 rangesJSONArray.getJSONObject(i);
153
154 ranges.add(rangeJSONObject.getString("range"));
155 }
156 }
157
158 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
159 facetConfiguration.getFieldName(),
160 facetConfiguration.getFieldName(), ranges);
161
162 rangeFacetHandler.setTermCountSize(TermCountSize.large);
163
164 facetHandlers.add(rangeFacetHandler);
165 }
166 else if (facet instanceof SimpleFacet) {
167 SimpleFacetHandler simpleFacetHandler =
168 new SimpleFacetHandler(
169 facetConfiguration.getFieldName(),
170 facetConfiguration.getFieldName());
171
172 facetHandlers.add(simpleFacetHandler);
173 }
174 }
175
176 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
177 indexSearcher.getIndexReader(), facetHandlers);
178
179 SortField[] sortFields = new SortField[0];
180
181 Sort[] sorts = searchContext.getSorts();
182
183 if (sorts != null) {
184 sortFields = new SortField[sorts.length];
185
186 for (int i = 0; i < sorts.length; i++) {
187 Sort sort = sorts[i];
188
189 if ((sort.getType() == Sort.STRING_TYPE) &&
190 (searchContext.getLocale() != null)) {
191
192 sortFields[i] = new SortField(
193 sort.getFieldName(), searchContext.getLocale(),
194 sort.isReverse());
195 }
196 else {
197 sortFields[i] = new SortField(
198 sort.getFieldName(), sort.getType(),
199 sort.isReverse());
200 }
201 }
202 }
203
204 browseRequest = new BrowseRequest();
205
206 for (Facet facet : facets.values()) {
207 if (facet.isStatic()) {
208 continue;
209 }
210
211 FacetConfiguration facetConfiguration =
212 facet.getFacetConfiguration();
213
214 FacetSpec facetSpec = new FacetSpec();
215
216 facetSpec.setOrderBy(
217 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
218
219 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
220 }
221
222 int end = searchContext.getEnd();
223
224 if ((end == QueryUtil.ALL_POS) ||
225 (end > PropsValues.INDEX_SEARCH_LIMIT)) {
226
227 end = PropsValues.INDEX_SEARCH_LIMIT;
228 }
229
230 browseRequest.setCount(end);
231
232 browseRequest.setOffset(0);
233 browseRequest.setQuery(
234 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
235 query));
236 browseRequest.setSort(sortFields);
237
238 boboBrowser = new BoboBrowser(boboIndexReader);
239
240 long startTime = System.currentTimeMillis();
241
242 BrowseResult browseResult = boboBrowser.browse(browseRequest);
243
244 long endTime = System.currentTimeMillis();
245
246 float searchTime = (float)(endTime - startTime) / Time.SECOND;
247
248 hits = toHits(
249 indexSearcher, browseResult, query, startTime, searchTime,
250 searchContext.getStart(), searchContext.getEnd());
251
252 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
253
254 for (Map.Entry<String, FacetAccessible> entry :
255 facetMap.entrySet()) {
256
257 Facet facet = facets.get(entry.getKey());
258
259 FacetAccessible facetAccessible = entry.getValue();
260
261 FacetCollector facetCollector = new BoboFacetCollector(
262 entry.getKey(), facetAccessible);
263
264 facet.setFacetCollector(facetCollector);
265 }
266 }
267 catch (BooleanQuery.TooManyClauses tmc) {
268 int maxClauseCount = BooleanQuery.getMaxClauseCount();
269
270 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
271
272 try {
273 long startTime = System.currentTimeMillis();
274
275 BrowseResult browseResult = boboBrowser.browse(browseRequest);
276
277 long endTime = System.currentTimeMillis();
278
279 float searchTime = (float)(endTime - startTime) / Time.SECOND;
280
281 hits = toHits(
282 indexSearcher, browseResult, query, startTime, searchTime,
283 searchContext.getStart(), searchContext.getEnd());
284
285 Map<String, FacetAccessible> facetMap =
286 browseResult.getFacetMap();
287
288 for (Map.Entry<String, FacetAccessible> entry :
289 facetMap.entrySet()) {
290
291 Facet facet = facets.get(entry.getKey());
292
293 FacetAccessible facetAccessible = entry.getValue();
294
295 FacetCollector facetCollector = new BoboFacetCollector(
296 entry.getKey(), facetAccessible);
297
298 facet.setFacetCollector(facetCollector);
299 }
300 }
301 catch (Exception e) {
302 throw new SearchException(e);
303 }
304 finally {
305 BooleanQuery.setMaxClauseCount(maxClauseCount);
306 }
307 }
308 catch (ParseException pe) {
309 _log.error("Query " + query, pe);
310
311 return new HitsImpl();
312 }
313 catch (Exception e) {
314 throw new SearchException(e);
315 }
316 finally {
317 cleanUp(boboBrowser);
318
319 try {
320 LuceneHelperUtil.releaseIndexSearcher(
321 searchContext.getCompanyId(), indexSearcher);
322 }
323 catch (IOException ioe) {
324 _log.error("Unable to release searcher", ioe);
325 }
326 }
327
328 if (_log.isDebugEnabled()) {
329 _log.debug(
330 "Search found " + hits.getLength() + " results in " +
331 hits.getSearchTime() + "ms");
332 }
333
334 return hits;
335 }
336
337 protected void cleanUp(BoboBrowser boboBrowser) {
338 if (boboBrowser == null) {
339 return;
340 }
341
342 try {
343 boboBrowser.close();
344 }
345 catch (IOException ioe) {
346 _log.error(ioe, ioe);
347 }
348
349 Browsable[] browsables = boboBrowser.getSubBrowsers();
350
351 for (Browsable browsable : browsables) {
352 if (!(browsable instanceof BoboSubBrowser)) {
353 continue;
354 }
355
356 BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
357
358 BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
359
360 try {
361 ThreadLocal<?> threadLocal =
362 (ThreadLocal<?>)_runtimeFacetDataMapField.get(
363 boboIndexReader);
364
365 threadLocal.remove();
366
367 _runtimeFacetDataMapField.set(boboIndexReader, null);
368 }
369 catch (Exception e) {
370 _log.error(
371 "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
372 e);
373 }
374
375 try {
376 ThreadLocal<?> threadLocal =
377 (ThreadLocal<?>)_runtimeFacetHandlerMapField.get(
378 boboIndexReader);
379
380 threadLocal.remove();
381
382 _runtimeFacetHandlerMapField.set(boboIndexReader, null);
383 }
384 catch (Exception e) {
385 _log.error(
386 "Unable to clean up BoboIndexReader#" +
387 "_runtimeFacetHandlerMap",
388 e);
389 }
390 }
391 }
392
393 protected DocumentImpl getDocument(
394 org.apache.lucene.document.Document oldDocument) {
395
396 DocumentImpl newDocument = new DocumentImpl();
397
398 List<org.apache.lucene.document.Fieldable> oldFieldables =
399 oldDocument.getFields();
400
401 for (org.apache.lucene.document.Fieldable oldFieldable :
402 oldFieldables) {
403
404 Field newField = null;
405
406 String[] values = oldDocument.getValues(oldFieldable.name());
407
408 if ((values != null) && (values.length > 1)) {
409 newField = new Field(oldFieldable.name(), values);
410 }
411 else {
412 newField = new Field(
413 oldFieldable.name(), oldFieldable.stringValue());
414 }
415
416 newField.setNumeric(oldFieldable instanceof NumericField);
417 newField.setTokenized(oldFieldable.isTokenized());
418
419 newDocument.add(newField);
420 }
421
422 return newDocument;
423 }
424
425 protected Set<String> getQueryTerms(Query query) {
426 Set<String> queryTerms = new HashSet<String>();
427
428 try {
429 queryTerms = LuceneHelperUtil.getQueryTerms(
430 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
431 query));
432 }
433 catch (ParseException pe) {
434 _log.error("Query " + query, pe);
435 }
436
437 return queryTerms;
438 }
439
440 protected String getSnippet(
441 org.apache.lucene.document.Document doc, Query query, String field,
442 Locale locale, Document hitDoc, Set<String> matchingTerms)
443 throws IOException {
444
445 String snippetField = DocumentImpl.getLocalizedName(locale, field);
446 String snippet = null;
447
448 try {
449 org.apache.lucene.search.Query luceneQuery =
450 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
451 query);
452
453 String[] values = doc.getValues(snippetField);
454
455 TermCollectingFormatter termCollectingFormatter =
456 new TermCollectingFormatter();
457
458 if (ArrayUtil.isNotEmpty(values)) {
459 snippet = LuceneHelperUtil.getSnippet(
460 luceneQuery, snippetField, StringUtil.merge(values),
461 termCollectingFormatter);
462 }
463
464 if (ArrayUtil.isEmpty(values) || Validator.isNull(snippet)) {
465 snippetField = field;
466
467 values = doc.getValues(snippetField);
468
469 if (ArrayUtil.isEmpty(values)) {
470 return StringPool.BLANK;
471 }
472
473 snippet = LuceneHelperUtil.getSnippet(
474 luceneQuery, field, StringUtil.merge(values),
475 termCollectingFormatter);
476 }
477
478 if (Validator.isNull(snippet)) {
479 return StringPool.BLANK;
480 }
481
482 matchingTerms.addAll(termCollectingFormatter.getTerms());
483 }
484 catch (ParseException pe) {
485 _log.error("Query " + query, pe);
486 }
487
488 hitDoc.addText(
489 Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
490 snippet);
491
492 return snippet;
493 }
494
495 protected Hits toHits(
496 IndexSearcher indexSearcher, BrowseResult browseResult, Query query,
497 long startTime, float searchTime, int start, int end)
498 throws IOException, ParseException {
499
500 int total = browseResult.getNumHits();
501
502 BrowseHit[] browseHits = browseResult.getHits();
503
504 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
505 start = 0;
506 end = total;
507 }
508
509 int[] startAndEnd = SearchPaginationUtil.calculateStartAndEnd(
510 start, end, total);
511
512 start = startAndEnd[0];
513 end = startAndEnd[1];
514
515 Set<String> queryTerms = new HashSet<String>();
516
517 IndexReader indexReader = indexSearcher.getIndexReader();
518
519 List<String> indexedFieldNames = new ArrayList<String> (
520 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
521
522 org.apache.lucene.search.Query luceneQuery =
523 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
524 query);
525
526 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
527 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
528
529 Hits hits = new HitsImpl();
530
531 if ((start < 0) || (start > end)) {
532 return hits;
533 }
534
535 int subsetTotal = end - start;
536
537 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
538 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
539 }
540
541 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
542 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
543
544 FieldSelector fieldSelector = null;
545
546 QueryConfig queryConfig = query.getQueryConfig();
547
548 String[] selectedFieldNames = queryConfig.getSelectedFieldNames();
549
550 if (ArrayUtil.isNotEmpty(selectedFieldNames) &&
551 !selectedFieldNames[0].equals(Field.ANY)) {
552
553 fieldSelector = new SetBasedFieldSelector(
554 SetUtil.fromArray(selectedFieldNames),
555 Collections.<String>emptySet());
556 }
557
558 for (int i = start; i < start + subsetTotal; i++) {
559 int docId = browseHits[i].getDocid();
560
561 org.apache.lucene.document.Document document = indexSearcher.doc(
562 docId, fieldSelector);
563
564 Document subsetDocument = getDocument(document);
565
566 String[] highlightFieldNames = queryConfig.getHighlightFieldNames();
567
568 for (String highlightFieldName : highlightFieldNames) {
569 getSnippet(
570 document, query, highlightFieldName,
571 queryConfig.getLocale(), subsetDocument, queryTerms);
572 }
573
574 subsetDocs.add(subsetDocument);
575
576 Float subsetScore = browseHits[i].getScore();
577
578 if (scoredFieldNamesCount > 0) {
579 subsetScore = subsetScore / scoredFieldNamesCount;
580 }
581
582 subsetScores.add(subsetScore);
583
584 if (_log.isDebugEnabled()) {
585 try {
586 Explanation explanation = indexSearcher.explain(
587 luceneQuery, docId);
588
589 _log.debug(explanation.toString());
590 }
591 catch (Exception e) {
592 }
593 }
594 }
595
596 if (!queryConfig.isHighlightEnabled()) {
597 queryTerms = getQueryTerms(query);
598 }
599
600 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
601 hits.setLength(total);
602 hits.setQuery(query);
603 hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()]));
604 hits.setScores(ArrayUtil.toFloatArray(subsetScores));
605 hits.setSearchTime(searchTime);
606 hits.setStart(startTime);
607
608 return hits;
609 }
610
611 private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
612
613 private static java.lang.reflect.Field _runtimeFacetDataMapField;
614 private static java.lang.reflect.Field _runtimeFacetHandlerMapField;
615
616 static {
617 try {
618 _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
619 BoboIndexReader.class, "_runtimeFacetDataMap");
620 _runtimeFacetHandlerMapField = ReflectionUtil.getDeclaredField(
621 BoboIndexReader.class, "_runtimeFacetHandlerMap");
622 }
623 catch (Exception e) {
624 throw new ExceptionInInitializerError(e);
625 }
626 }
627
628 private class TermCollectingFormatter implements Formatter {
629
630 public Set<String> getTerms() {
631 return _terms;
632 }
633
634 @Override
635 public String highlightTerm(
636 String originalText, TokenGroup tokenGroup) {
637
638 if (tokenGroup.getTotalScore() > 0) {
639 _terms.add(originalText);
640 }
641
642 return originalText;
643 }
644
645 private Set<String> _terms = new HashSet<String>();
646
647 }
648
649 }