001
014
015 package com.liferay.portal.search.lucene;
016
017 import com.browseengine.bobo.api.BoboBrowser;
018 import com.browseengine.bobo.api.BoboIndexReader;
019 import com.browseengine.bobo.api.BoboSubBrowser;
020 import com.browseengine.bobo.api.Browsable;
021 import com.browseengine.bobo.api.BrowseHit;
022 import com.browseengine.bobo.api.BrowseRequest;
023 import com.browseengine.bobo.api.BrowseResult;
024 import com.browseengine.bobo.api.FacetAccessible;
025 import com.browseengine.bobo.api.FacetSpec;
026 import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
027 import com.browseengine.bobo.facets.FacetHandler;
028 import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
029 import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
030 import com.browseengine.bobo.facets.impl.RangeFacetHandler;
031 import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
032
033 import com.liferay.portal.kernel.dao.orm.QueryUtil;
034 import com.liferay.portal.kernel.dao.search.SearchPaginationUtil;
035 import com.liferay.portal.kernel.json.JSONArray;
036 import com.liferay.portal.kernel.json.JSONObject;
037 import com.liferay.portal.kernel.log.Log;
038 import com.liferay.portal.kernel.log.LogFactoryUtil;
039 import com.liferay.portal.kernel.search.BaseIndexSearcher;
040 import com.liferay.portal.kernel.search.Document;
041 import com.liferay.portal.kernel.search.DocumentImpl;
042 import com.liferay.portal.kernel.search.Field;
043 import com.liferay.portal.kernel.search.Hits;
044 import com.liferay.portal.kernel.search.HitsImpl;
045 import com.liferay.portal.kernel.search.ParseException;
046 import com.liferay.portal.kernel.search.Query;
047 import com.liferay.portal.kernel.search.QueryConfig;
048 import com.liferay.portal.kernel.search.QueryTranslatorUtil;
049 import com.liferay.portal.kernel.search.SearchContext;
050 import com.liferay.portal.kernel.search.SearchException;
051 import com.liferay.portal.kernel.search.Sort;
052 import com.liferay.portal.kernel.search.facet.Facet;
053 import com.liferay.portal.kernel.search.facet.MultiValueFacet;
054 import com.liferay.portal.kernel.search.facet.RangeFacet;
055 import com.liferay.portal.kernel.search.facet.SimpleFacet;
056 import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
057 import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
058 import com.liferay.portal.kernel.util.ArrayUtil;
059 import com.liferay.portal.kernel.util.ReflectionUtil;
060 import com.liferay.portal.kernel.util.SetUtil;
061 import com.liferay.portal.kernel.util.StringPool;
062 import com.liferay.portal.kernel.util.StringUtil;
063 import com.liferay.portal.kernel.util.Time;
064 import com.liferay.portal.kernel.util.Validator;
065 import com.liferay.portal.search.BoboFacetCollector;
066 import com.liferay.portal.util.PropsValues;
067
068 import java.io.IOException;
069
070 import java.util.ArrayList;
071 import java.util.Collections;
072 import java.util.HashSet;
073 import java.util.List;
074 import java.util.Locale;
075 import java.util.Map;
076 import java.util.Set;
077
078 import org.apache.lucene.document.FieldSelector;
079 import org.apache.lucene.document.NumericField;
080 import org.apache.lucene.document.SetBasedFieldSelector;
081 import org.apache.lucene.index.IndexReader;
082 import org.apache.lucene.search.BooleanQuery;
083 import org.apache.lucene.search.Explanation;
084 import org.apache.lucene.search.IndexSearcher;
085 import org.apache.lucene.search.SortField;
086 import org.apache.lucene.search.highlight.Formatter;
087 import org.apache.lucene.search.highlight.TokenGroup;
088
089
092 public class LuceneIndexSearcher extends BaseIndexSearcher {
093
094 @Override
095 public Hits search(SearchContext searchContext, Query query)
096 throws SearchException {
097
098 if (_log.isDebugEnabled()) {
099 _log.debug("Query " + query);
100 }
101
102 Hits hits = null;
103
104 IndexSearcher indexSearcher = null;
105 Map<String, Facet> facets = null;
106 BoboBrowser boboBrowser = null;
107 BrowseRequest browseRequest = null;
108
109 try {
110 indexSearcher = LuceneHelperUtil.getIndexSearcher(
111 searchContext.getCompanyId());
112
113 List<FacetHandler<?>> facetHandlers =
114 new ArrayList<FacetHandler<?>>();
115
116 facets = searchContext.getFacets();
117
118 for (Facet facet : facets.values()) {
119 if (facet.isStatic()) {
120 continue;
121 }
122
123 FacetConfiguration facetConfiguration =
124 facet.getFacetConfiguration();
125
126 if (facet instanceof MultiValueFacet) {
127 MultiValueFacetHandler multiValueFacetHandler =
128 new MultiValueFacetHandler(
129 facetConfiguration.getFieldName(),
130 facetConfiguration.getFieldName());
131
132 JSONObject dataJSONObject = facetConfiguration.getData();
133
134 if (dataJSONObject.has("maxTerms")) {
135 multiValueFacetHandler.setMaxItems(
136 dataJSONObject.getInt("maxTerms"));
137 }
138
139 facetHandlers.add(multiValueFacetHandler);
140 }
141 else if (facet instanceof RangeFacet) {
142 List<String> ranges = new ArrayList<String>();
143
144 JSONObject dataJSONObject = facetConfiguration.getData();
145
146 JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
147 "ranges");
148
149 if (rangesJSONArray != null) {
150 for (int i = 0; i < rangesJSONArray.length(); i++) {
151 JSONObject rangeJSONObject =
152 rangesJSONArray.getJSONObject(i);
153
154 ranges.add(rangeJSONObject.getString("range"));
155 }
156 }
157
158 RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
159 facetConfiguration.getFieldName(),
160 facetConfiguration.getFieldName(), ranges);
161
162 rangeFacetHandler.setTermCountSize(TermCountSize.large);
163
164 facetHandlers.add(rangeFacetHandler);
165 }
166 else if (facet instanceof SimpleFacet) {
167 SimpleFacetHandler simpleFacetHandler =
168 new SimpleFacetHandler(
169 facetConfiguration.getFieldName(),
170 facetConfiguration.getFieldName());
171
172 facetHandlers.add(simpleFacetHandler);
173 }
174 }
175
176 BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
177 indexSearcher.getIndexReader(), facetHandlers);
178
179 SortField[] sortFields = new SortField[0];
180
181 Sort[] sorts = searchContext.getSorts();
182
183 if (sorts != null) {
184 sortFields = new SortField[sorts.length];
185
186 for (int i = 0; i < sorts.length; i++) {
187 Sort sort = sorts[i];
188
189 if ((sort.getType() == Sort.STRING_TYPE) &&
190 (searchContext.getLocale() != null)) {
191
192 sortFields[i] = new SortField(
193 sort.getFieldName(), searchContext.getLocale(),
194 sort.isReverse());
195 }
196 else {
197 sortFields[i] = new SortField(
198 sort.getFieldName(), sort.getType(),
199 sort.isReverse());
200 }
201 }
202 }
203
204 browseRequest = new BrowseRequest();
205
206 for (Facet facet : facets.values()) {
207 if (facet.isStatic()) {
208 continue;
209 }
210
211 FacetConfiguration facetConfiguration =
212 facet.getFacetConfiguration();
213
214 FacetSpec facetSpec = new FacetSpec();
215
216 facetSpec.setOrderBy(
217 FacetSortSpec.valueOf(facetConfiguration.getOrder()));
218
219 browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
220 }
221
222 int end = searchContext.getEnd();
223
224 if ((end == QueryUtil.ALL_POS) ||
225 (end > PropsValues.INDEX_SEARCH_LIMIT)) {
226
227 end = PropsValues.INDEX_SEARCH_LIMIT;
228 }
229
230 browseRequest.setCount(end);
231 browseRequest.setOffset(0);
232 browseRequest.setQuery(
233 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
234 query));
235 browseRequest.setSort(sortFields);
236
237 boboBrowser = new BoboBrowser(boboIndexReader);
238
239 long startTime = System.currentTimeMillis();
240
241 BrowseResult browseResult = boboBrowser.browse(browseRequest);
242
243 long endTime = System.currentTimeMillis();
244
245 float searchTime = (float)(endTime - startTime) / Time.SECOND;
246
247 hits = toHits(
248 indexSearcher, browseResult, query, startTime, searchTime,
249 searchContext.getStart(), searchContext.getEnd());
250
251 Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
252
253 for (Map.Entry<String, FacetAccessible> entry :
254 facetMap.entrySet()) {
255
256 Facet facet = facets.get(entry.getKey());
257
258 FacetAccessible facetAccessible = entry.getValue();
259
260 FacetCollector facetCollector = new BoboFacetCollector(
261 entry.getKey(), facetAccessible);
262
263 facet.setFacetCollector(facetCollector);
264 }
265 }
266 catch (BooleanQuery.TooManyClauses tmc) {
267 int maxClauseCount = BooleanQuery.getMaxClauseCount();
268
269 BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
270
271 try {
272 long startTime = System.currentTimeMillis();
273
274 BrowseResult browseResult = boboBrowser.browse(browseRequest);
275
276 long endTime = System.currentTimeMillis();
277
278 float searchTime = (float)(endTime - startTime) / Time.SECOND;
279
280 hits = toHits(
281 indexSearcher, browseResult, query, startTime, searchTime,
282 searchContext.getStart(), searchContext.getEnd());
283
284 Map<String, FacetAccessible> facetMap =
285 browseResult.getFacetMap();
286
287 for (Map.Entry<String, FacetAccessible> entry :
288 facetMap.entrySet()) {
289
290 Facet facet = facets.get(entry.getKey());
291
292 FacetAccessible facetAccessible = entry.getValue();
293
294 FacetCollector facetCollector = new BoboFacetCollector(
295 entry.getKey(), facetAccessible);
296
297 facet.setFacetCollector(facetCollector);
298 }
299 }
300 catch (Exception e) {
301 throw new SearchException(e);
302 }
303 finally {
304 BooleanQuery.setMaxClauseCount(maxClauseCount);
305 }
306 }
307 catch (ParseException pe) {
308 _log.error("Query " + query, pe);
309
310 return new HitsImpl();
311 }
312 catch (Exception e) {
313 throw new SearchException(e);
314 }
315 finally {
316 cleanUp(boboBrowser);
317
318 try {
319 LuceneHelperUtil.releaseIndexSearcher(
320 searchContext.getCompanyId(), indexSearcher);
321 }
322 catch (IOException ioe) {
323 _log.error("Unable to release searcher", ioe);
324 }
325 }
326
327 if (_log.isDebugEnabled()) {
328 _log.debug(
329 "Search found " + hits.getLength() + " results in " +
330 hits.getSearchTime() + "ms");
331 }
332
333 return hits;
334 }
335
336 protected void cleanUp(BoboBrowser boboBrowser) {
337 if (boboBrowser == null) {
338 return;
339 }
340
341 try {
342 boboBrowser.close();
343 }
344 catch (IOException ioe) {
345 _log.error(ioe, ioe);
346 }
347
348 Browsable[] browsables = boboBrowser.getSubBrowsers();
349
350 for (Browsable browsable : browsables) {
351 if (!(browsable instanceof BoboSubBrowser)) {
352 continue;
353 }
354
355 BoboSubBrowser boboSubBrowser = (BoboSubBrowser)browsable;
356
357 BoboIndexReader boboIndexReader = boboSubBrowser.getIndexReader();
358
359 try {
360 ThreadLocal<?> threadLocal =
361 (ThreadLocal<?>)_runtimeFacetDataMapField.get(
362 boboIndexReader);
363
364 threadLocal.remove();
365
366 _runtimeFacetDataMapField.set(boboIndexReader, null);
367 }
368 catch (Exception e) {
369 _log.error(
370 "Unable to clean up BoboIndexReader#_runtimeFacetDataMap",
371 e);
372 }
373
374 try {
375 ThreadLocal<?> threadLocal =
376 (ThreadLocal<?>)_runtimeFacetHandlerMapField.get(
377 boboIndexReader);
378
379 threadLocal.remove();
380
381 _runtimeFacetHandlerMapField.set(boboIndexReader, null);
382 }
383 catch (Exception e) {
384 _log.error(
385 "Unable to clean up BoboIndexReader#" +
386 "_runtimeFacetHandlerMap",
387 e);
388 }
389 }
390 }
391
392 protected DocumentImpl getDocument(
393 org.apache.lucene.document.Document oldDocument) {
394
395 DocumentImpl newDocument = new DocumentImpl();
396
397 List<org.apache.lucene.document.Fieldable> oldFieldables =
398 oldDocument.getFields();
399
400 for (org.apache.lucene.document.Fieldable oldFieldable :
401 oldFieldables) {
402
403 Field newField = null;
404
405 String[] values = oldDocument.getValues(oldFieldable.name());
406
407 if ((values != null) && (values.length > 1)) {
408 newField = new Field(oldFieldable.name(), values);
409 }
410 else {
411 newField = new Field(
412 oldFieldable.name(), oldFieldable.stringValue());
413 }
414
415 newField.setNumeric(oldFieldable instanceof NumericField);
416 newField.setTokenized(oldFieldable.isTokenized());
417
418 newDocument.add(newField);
419 }
420
421 return newDocument;
422 }
423
424 protected Set<String> getQueryTerms(Query query) {
425 Set<String> queryTerms = new HashSet<String>();
426
427 try {
428 queryTerms = LuceneHelperUtil.getQueryTerms(
429 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
430 query));
431 }
432 catch (ParseException pe) {
433 _log.error("Query " + query, pe);
434 }
435
436 return queryTerms;
437 }
438
439 protected String getSnippet(
440 org.apache.lucene.document.Document doc, Query query, String field,
441 Locale locale, Document hitDoc, Set<String> matchingTerms)
442 throws IOException {
443
444 String snippetField = DocumentImpl.getLocalizedName(locale, field);
445 String snippet = null;
446
447 try {
448 org.apache.lucene.search.Query luceneQuery =
449 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
450 query);
451
452 String[] values = doc.getValues(snippetField);
453
454 TermCollectingFormatter termCollectingFormatter =
455 new TermCollectingFormatter();
456
457 if (ArrayUtil.isNotEmpty(values)) {
458 snippet = LuceneHelperUtil.getSnippet(
459 luceneQuery, snippetField, StringUtil.merge(values),
460 termCollectingFormatter);
461 }
462
463 if (ArrayUtil.isEmpty(values) || Validator.isNull(snippet)) {
464 snippetField = field;
465
466 values = doc.getValues(snippetField);
467
468 if (ArrayUtil.isEmpty(values)) {
469 return StringPool.BLANK;
470 }
471
472 snippet = LuceneHelperUtil.getSnippet(
473 luceneQuery, field, StringUtil.merge(values),
474 termCollectingFormatter);
475 }
476
477 if (Validator.isNull(snippet)) {
478 return StringPool.BLANK;
479 }
480
481 matchingTerms.addAll(termCollectingFormatter.getTerms());
482 }
483 catch (ParseException pe) {
484 _log.error("Query " + query, pe);
485 }
486
487 hitDoc.addText(
488 Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
489 snippet);
490
491 return snippet;
492 }
493
494 protected Hits toHits(
495 IndexSearcher indexSearcher, BrowseResult browseResult, Query query,
496 long startTime, float searchTime, int start, int end)
497 throws IOException, ParseException {
498
499 int total = browseResult.getNumHits();
500
501 BrowseHit[] browseHits = browseResult.getHits();
502
503 if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
504 start = 0;
505 end = total;
506 }
507
508 int[] startAndEnd = SearchPaginationUtil.calculateStartAndEnd(
509 start, end, total);
510
511 start = startAndEnd[0];
512 end = startAndEnd[1];
513
514 Set<String> queryTerms = new HashSet<String>();
515
516 IndexReader indexReader = indexSearcher.getIndexReader();
517
518 List<String> indexedFieldNames = new ArrayList<String> (
519 indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
520
521 org.apache.lucene.search.Query luceneQuery =
522 (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
523 query);
524
525 int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
526 luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
527
528 Hits hits = new HitsImpl();
529
530 if ((start < 0) || (start > end)) {
531 return hits;
532 }
533
534 int subsetTotal = end - start;
535
536 if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
537 subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
538 }
539
540 List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
541 List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
542
543 FieldSelector fieldSelector = null;
544
545 QueryConfig queryConfig = query.getQueryConfig();
546
547 String[] selectedFieldNames = queryConfig.getSelectedFieldNames();
548
549 if (ArrayUtil.isNotEmpty(selectedFieldNames) &&
550 !selectedFieldNames[0].equals(Field.ANY)) {
551
552 fieldSelector = new SetBasedFieldSelector(
553 SetUtil.fromArray(selectedFieldNames),
554 Collections.<String>emptySet());
555 }
556
557 for (int i = start; i < start + subsetTotal; i++) {
558 int docId = browseHits[i].getDocid();
559
560 org.apache.lucene.document.Document document = indexSearcher.doc(
561 docId, fieldSelector);
562
563 Document subsetDocument = getDocument(document);
564
565 String[] highlightFieldNames = queryConfig.getHighlightFieldNames();
566
567 for (String highlightFieldName : highlightFieldNames) {
568 getSnippet(
569 document, query, highlightFieldName,
570 queryConfig.getLocale(), subsetDocument, queryTerms);
571 }
572
573 subsetDocs.add(subsetDocument);
574
575 Float subsetScore = browseHits[i].getScore();
576
577 if (scoredFieldNamesCount > 0) {
578 subsetScore = subsetScore / scoredFieldNamesCount;
579 }
580
581 subsetScores.add(subsetScore);
582
583 if (_log.isDebugEnabled()) {
584 try {
585 Explanation explanation = indexSearcher.explain(
586 luceneQuery, docId);
587
588 _log.debug(explanation.toString());
589 }
590 catch (Exception e) {
591 }
592 }
593 }
594
595 if (!queryConfig.isHighlightEnabled()) {
596 queryTerms = getQueryTerms(query);
597 }
598
599 hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
600 hits.setLength(total);
601 hits.setQuery(query);
602 hits.setQueryTerms(queryTerms.toArray(new String[queryTerms.size()]));
603 hits.setScores(ArrayUtil.toFloatArray(subsetScores));
604 hits.setSearchTime(searchTime);
605 hits.setStart(startTime);
606
607 return hits;
608 }
609
610 private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
611
612 private static java.lang.reflect.Field _runtimeFacetDataMapField;
613 private static java.lang.reflect.Field _runtimeFacetHandlerMapField;
614
615 static {
616 try {
617 _runtimeFacetDataMapField = ReflectionUtil.getDeclaredField(
618 BoboIndexReader.class, "_runtimeFacetDataMap");
619 _runtimeFacetHandlerMapField = ReflectionUtil.getDeclaredField(
620 BoboIndexReader.class, "_runtimeFacetHandlerMap");
621 }
622 catch (Exception e) {
623 throw new ExceptionInInitializerError(e);
624 }
625 }
626
627 private class TermCollectingFormatter implements Formatter {
628
629 public Set<String> getTerms() {
630 return _terms;
631 }
632
633 @Override
634 public String highlightTerm(
635 String originalText, TokenGroup tokenGroup) {
636
637 if (tokenGroup.getTotalScore() > 0) {
638 _terms.add(originalText);
639 }
640
641 return originalText;
642 }
643
644 private Set<String> _terms = new HashSet<String>();
645
646 }
647
648 }