001    /**
002     * Copyright (c) 2000-2012 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import com.browseengine.bobo.api.BoboBrowser;
018    import com.browseengine.bobo.api.BoboIndexReader;
019    import com.browseengine.bobo.api.Browsable;
020    import com.browseengine.bobo.api.BrowseHit;
021    import com.browseengine.bobo.api.BrowseRequest;
022    import com.browseengine.bobo.api.BrowseResult;
023    import com.browseengine.bobo.api.FacetAccessible;
024    import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
025    import com.browseengine.bobo.api.FacetSpec;
026    import com.browseengine.bobo.facets.FacetHandler.TermCountSize;
027    import com.browseengine.bobo.facets.FacetHandler;
028    import com.browseengine.bobo.facets.impl.MultiValueFacetHandler;
029    import com.browseengine.bobo.facets.impl.RangeFacetHandler;
030    import com.browseengine.bobo.facets.impl.SimpleFacetHandler;
031    
032    import com.liferay.portal.kernel.dao.orm.QueryUtil;
033    import com.liferay.portal.kernel.json.JSONArray;
034    import com.liferay.portal.kernel.json.JSONObject;
035    import com.liferay.portal.kernel.log.Log;
036    import com.liferay.portal.kernel.log.LogFactoryUtil;
037    import com.liferay.portal.kernel.search.BaseIndexSearcher;
038    import com.liferay.portal.kernel.search.Document;
039    import com.liferay.portal.kernel.search.DocumentImpl;
040    import com.liferay.portal.kernel.search.Field;
041    import com.liferay.portal.kernel.search.Hits;
042    import com.liferay.portal.kernel.search.HitsImpl;
043    import com.liferay.portal.kernel.search.ParseException;
044    import com.liferay.portal.kernel.search.Query;
045    import com.liferay.portal.kernel.search.QueryConfig;
046    import com.liferay.portal.kernel.search.QueryTranslatorUtil;
047    import com.liferay.portal.kernel.search.SearchContext;
048    import com.liferay.portal.kernel.search.SearchException;
049    import com.liferay.portal.kernel.search.Sort;
050    import com.liferay.portal.kernel.search.facet.Facet;
051    import com.liferay.portal.kernel.search.facet.MultiValueFacet;
052    import com.liferay.portal.kernel.search.facet.RangeFacet;
053    import com.liferay.portal.kernel.search.facet.SimpleFacet;
054    import com.liferay.portal.kernel.search.facet.collector.FacetCollector;
055    import com.liferay.portal.kernel.search.facet.config.FacetConfiguration;
056    import com.liferay.portal.kernel.util.ArrayUtil;
057    import com.liferay.portal.kernel.util.StringPool;
058    import com.liferay.portal.kernel.util.StringUtil;
059    import com.liferay.portal.kernel.util.Time;
060    import com.liferay.portal.kernel.util.Validator;
061    import com.liferay.portal.search.BoboFacetCollector;
062    import com.liferay.portal.util.PropsValues;
063    
064    import java.io.IOException;
065    
066    import java.util.ArrayList;
067    import java.util.HashSet;
068    import java.util.List;
069    import java.util.Locale;
070    import java.util.Map;
071    import java.util.Set;
072    
073    import org.apache.lucene.document.NumericField;
074    import org.apache.lucene.index.IndexReader;
075    import org.apache.lucene.search.BooleanQuery;
076    import org.apache.lucene.search.Explanation;
077    import org.apache.lucene.search.ScoreDoc;
078    import org.apache.lucene.search.SortField;
079    import org.apache.lucene.search.TopFieldDocs;
080    import org.apache.lucene.search.highlight.Formatter;
081    import org.apache.lucene.search.highlight.TokenGroup;
082    
083    /**
084     * @author Bruno Farache
085     */
086    public class LuceneIndexSearcher extends BaseIndexSearcher {
087    
088            public Hits search(SearchContext searchContext, Query query)
089                    throws SearchException {
090    
091                    if (_log.isDebugEnabled()) {
092                            _log.debug("Query " + query);
093                    }
094    
095                    Hits hits = null;
096    
097                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
098                    Map<String, Facet> facets = null;
099                    BrowseRequest browseRequest = null;
100                    Browsable browsable = null;
101    
102                    try {
103                            indexSearcher = LuceneHelperUtil.getSearcher(
104                                    searchContext.getCompanyId(), true);
105    
106                            List<FacetHandler<?>> facetHandlers =
107                                    new ArrayList<FacetHandler<?>>();
108    
109                            facets = searchContext.getFacets();
110    
111                            for (Facet facet : facets.values()) {
112                                    if (facet.isStatic()) {
113                                            continue;
114                                    }
115    
116                                    FacetConfiguration facetConfiguration =
117                                            facet.getFacetConfiguration();
118    
119                                    if (facet instanceof MultiValueFacet) {
120                                            MultiValueFacetHandler multiValueFacetHandler =
121                                                    new MultiValueFacetHandler(
122                                                            facetConfiguration.getFieldName(),
123                                                            facetConfiguration.getFieldName());
124    
125                                            JSONObject dataJSONObject = facetConfiguration.getData();
126    
127                                            if (dataJSONObject.has("maxTerms")) {
128                                                    multiValueFacetHandler.setMaxItems(
129                                                            dataJSONObject.getInt("maxTerms"));
130                                            }
131    
132                                            facetHandlers.add(multiValueFacetHandler);
133                                    }
134                                    else if (facet instanceof RangeFacet) {
135                                            List<String> ranges = new ArrayList<String>();
136    
137                                            JSONObject dataJSONObject = facetConfiguration.getData();
138    
139                                            JSONArray rangesJSONArray = dataJSONObject.getJSONArray(
140                                                    "ranges");
141    
142                                            if (rangesJSONArray != null) {
143                                                    for (int i = 0; i < rangesJSONArray.length(); i++) {
144                                                            JSONObject rangeJSONObject =
145                                                                    rangesJSONArray.getJSONObject(i);
146    
147                                                            ranges.add(rangeJSONObject.getString("range"));
148                                                    }
149                                            }
150    
151                                            RangeFacetHandler rangeFacetHandler = new RangeFacetHandler(
152                                                    facetConfiguration.getFieldName(),
153                                                    facetConfiguration.getFieldName(), ranges);
154    
155                                            rangeFacetHandler.setTermCountSize(TermCountSize.large);
156    
157                                            facetHandlers.add(rangeFacetHandler);
158                                    }
159                                    else if (facet instanceof SimpleFacet) {
160                                            SimpleFacetHandler simpleFacetHandler =
161                                                    new SimpleFacetHandler(
162                                                            facetConfiguration.getFieldName(),
163                                                            facetConfiguration.getFieldName());
164    
165                                            facetHandlers.add(simpleFacetHandler);
166                                    }
167                            }
168    
169                            BoboIndexReader boboIndexReader = BoboIndexReader.getInstance(
170                                    indexSearcher.getIndexReader(), facetHandlers);
171    
172                            SortField[] sortFields = new SortField[0];
173    
174                            Sort[] sorts = searchContext.getSorts();
175    
176                            if (sorts != null) {
177                                    sortFields = new SortField[sorts.length];
178    
179                                    for (int i = 0; i < sorts.length; i++) {
180                                            Sort sort = sorts[i];
181    
182                                            sortFields[i] = new SortField(
183                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
184                                    }
185                            }
186    
187                            browseRequest = new BrowseRequest();
188    
189                            for (Facet facet : facets.values()) {
190                                    if (facet.isStatic()) {
191                                            continue;
192                                    }
193    
194                                    FacetConfiguration facetConfiguration =
195                                            facet.getFacetConfiguration();
196    
197                                    FacetSpec facetSpec = new FacetSpec();
198    
199                                    facetSpec.setOrderBy(
200                                            FacetSortSpec.valueOf(facetConfiguration.getOrder()));
201    
202                                    browseRequest.setFacetSpec(facet.getFieldName(), facetSpec);
203                            }
204    
205                            browseRequest.setCount(PropsValues.INDEX_SEARCH_LIMIT);
206                            browseRequest.setOffset(0);
207                            browseRequest.setQuery(
208                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
209                                            query));
210                            browseRequest.setSort(sortFields);
211    
212                            browsable = new BoboBrowser(boboIndexReader);
213    
214                            long startTime = System.currentTimeMillis();
215    
216                            BrowseResult browseResult = browsable.browse(browseRequest);
217    
218                            BrowseHit[] browseHits = browseResult.getHits();
219    
220                            long endTime = System.currentTimeMillis();
221    
222                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
223    
224                            hits = toHits(
225                                    indexSearcher, new HitDocs(browseHits), query, startTime,
226                                    searchTime, searchContext.getStart(), searchContext.getEnd());
227    
228                            Map<String, FacetAccessible> facetMap = browseResult.getFacetMap();
229    
230                            for (Map.Entry<String, FacetAccessible> entry :
231                                            facetMap.entrySet()) {
232    
233                                    Facet facet = facets.get(entry.getKey());
234    
235                                    FacetAccessible facetAccessible = entry.getValue();
236    
237                                    FacetCollector facetCollector = new BoboFacetCollector(
238                                            entry.getKey(), facetAccessible);
239    
240                                    facet.setFacetCollector(facetCollector);
241                            }
242                    }
243                    catch (BooleanQuery.TooManyClauses tmc) {
244                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
245    
246                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
247    
248                            try {
249                                    long startTime = System.currentTimeMillis();
250    
251                                    BrowseResult result = browsable.browse(browseRequest);
252    
253                                    BrowseHit[] browseHits = result.getHits();
254    
255                                    long endTime = System.currentTimeMillis();
256    
257                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
258    
259                                    hits = toHits(
260                                            indexSearcher, new HitDocs(browseHits), query, startTime,
261                                            searchTime, searchContext.getStart(),
262                                            searchContext.getEnd());
263    
264                                    Map<String, FacetAccessible> facetMap = result.getFacetMap();
265    
266                                    for (Map.Entry<String, FacetAccessible> entry :
267                                                    facetMap.entrySet()) {
268    
269                                            Facet facet = facets.get(entry.getKey());
270    
271                                            FacetAccessible facetAccessible = entry.getValue();
272    
273                                            FacetCollector facetCollector = new BoboFacetCollector(
274                                                    entry.getKey(), facetAccessible);
275    
276                                            facet.setFacetCollector(facetCollector);
277                                    }
278                            }
279                            catch (Exception e) {
280                                    throw new SearchException(e);
281                            }
282                            finally {
283                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
284                            }
285                    }
286                    catch (ParseException pe) {
287                            _log.error("Query " + query, pe);
288    
289                            return new HitsImpl();
290                    }
291                    catch (Exception e) {
292                            throw new SearchException(e);
293                    }
294                    finally {
295                            close(browsable);
296    
297                            if (indexSearcher != null) {
298                                    try {
299                                            indexSearcher.close();
300                                    }
301                                    catch (IOException ioe) {
302                                            _log.error(ioe, ioe);
303                                    }
304                            }
305                    }
306    
307                    if (_log.isDebugEnabled()) {
308                            _log.debug(
309                                    "Search found " + hits.getLength() + " results in " +
310                                            hits.getSearchTime() + "ms");
311                    }
312    
313                    return hits;
314            }
315    
316            public Hits search(
317                            String searchEngineId, long companyId, Query query, Sort[] sorts,
318                            int start, int end)
319                    throws SearchException {
320    
321                    if (_log.isDebugEnabled()) {
322                            _log.debug("Query " + query);
323                    }
324    
325                    Hits hits = null;
326    
327                    org.apache.lucene.search.IndexSearcher indexSearcher = null;
328                    org.apache.lucene.search.Sort luceneSort = null;
329    
330                    try {
331                            indexSearcher = LuceneHelperUtil.getSearcher(companyId, true);
332    
333                            if (sorts != null) {
334                                    SortField[] sortFields = new SortField[sorts.length];
335    
336                                    for (int i = 0; i < sorts.length; i++) {
337                                            Sort sort = sorts[i];
338    
339                                            sortFields[i] = new SortField(
340                                                    sort.getFieldName(), sort.getType(), sort.isReverse());
341                                    }
342    
343                                    luceneSort = new org.apache.lucene.search.Sort(sortFields);
344                            }
345                            else {
346                                    luceneSort = new org.apache.lucene.search.Sort();
347                            }
348    
349                            long startTime = System.currentTimeMillis();
350    
351                            TopFieldDocs topFieldDocs = indexSearcher.search(
352                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
353                                            query),
354                                    null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
355    
356                            long endTime = System.currentTimeMillis();
357    
358                            float searchTime = (float)(endTime - startTime) / Time.SECOND;
359    
360                            hits = toHits(
361                                    indexSearcher, new HitDocs(topFieldDocs), query, startTime,
362                                    searchTime, start, end);
363                    }
364                    catch (BooleanQuery.TooManyClauses tmc) {
365                            int maxClauseCount = BooleanQuery.getMaxClauseCount();
366    
367                            BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
368    
369                            try {
370                                    long startTime = System.currentTimeMillis();
371    
372                                    TopFieldDocs topFieldDocs = indexSearcher.search(
373                                            (org.apache.lucene.search.Query)
374                                                    QueryTranslatorUtil.translate(query),
375                                            null, PropsValues.INDEX_SEARCH_LIMIT, luceneSort);
376    
377                                    long endTime = System.currentTimeMillis();
378    
379                                    float searchTime = (float)(endTime - startTime) / Time.SECOND;
380    
381                                    hits = toHits(
382                                            indexSearcher, new HitDocs(topFieldDocs), query, startTime,
383                                            searchTime, start, end);
384                            }
385                            catch (Exception e) {
386                                    throw new SearchException(e);
387                            }
388                            finally {
389                                    BooleanQuery.setMaxClauseCount(maxClauseCount);
390                            }
391                    }
392                    catch (ParseException pe) {
393                            _log.error("Query " + query, pe);
394    
395                            return new HitsImpl();
396                    }
397                    catch (Exception e) {
398                            throw new SearchException(e);
399                    }
400                    finally {
401                            if (indexSearcher != null) {
402                                    try {
403                                            indexSearcher.close();
404                                    }
405                                    catch (IOException ioe) {
406                                            _log.error(ioe, ioe);
407                                    }
408                            }
409                    }
410    
411                    if (_log.isDebugEnabled()) {
412                            _log.debug(
413                                    "Search found " + hits.getLength() + " results in " +
414                                            hits.getSearchTime() + "ms");
415                    }
416    
417                    return hits;
418            }
419    
420            @SuppressWarnings("deprecation")
421            protected void close(Browsable browsable) {
422                    if (browsable != null) {
423                            try {
424                                    browsable.close();
425                            }
426                            catch (IOException ioe) {
427                                    _log.error(ioe, ioe);
428                            }
429                    }
430            }
431    
432            protected DocumentImpl getDocument(
433                    org.apache.lucene.document.Document oldDocument) {
434    
435                    DocumentImpl newDocument = new DocumentImpl();
436    
437                    List<org.apache.lucene.document.Fieldable> oldFieldables =
438                            oldDocument.getFields();
439    
440                    for (org.apache.lucene.document.Fieldable oldFieldable :
441                                    oldFieldables) {
442    
443                            Field newField = null;
444    
445                            String[] values = oldDocument.getValues(oldFieldable.name());
446    
447                            if ((values != null) && (values.length > 1)) {
448                                    newField = new Field(oldFieldable.name(), values);
449                            }
450                            else {
451                                    newField = new Field(
452                                            oldFieldable.name(), oldFieldable.stringValue());
453                            }
454    
455                            newField.setNumeric(oldFieldable instanceof NumericField);
456                            newField.setTokenized(oldFieldable.isTokenized());
457    
458                            newDocument.add(newField);
459                    }
460    
461                    return newDocument;
462            }
463    
464            protected Set<String> getQueryTerms(Query query) {
465                    Set<String> queryTerms = new HashSet<String>();
466    
467                    try {
468                            queryTerms = LuceneHelperUtil.getQueryTerms(
469                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
470                                            query));
471                    }
472                    catch (ParseException pe) {
473                            _log.error("Query " + query, pe);
474                    }
475    
476                    return queryTerms;
477            }
478    
479            protected String getSnippet(
480                            org.apache.lucene.document.Document doc, Query query, String field,
481                            Locale locale, Document hitDoc, Set<String> matchingTerms)
482                    throws IOException {
483    
484                    String snippetField = DocumentImpl.getLocalizedName(locale, field);
485                    String snippet = null;
486    
487                    try {
488                            org.apache.lucene.search.Query luceneQuery =
489                                    (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
490                                            query);
491    
492                            String[] values = doc.getValues(snippetField);
493    
494                            TermCollectingFormatter termCollectingFormatter =
495                                    new TermCollectingFormatter();
496    
497                            if ((values != null) && (values.length > 0)) {
498                                    snippet = LuceneHelperUtil.getSnippet(
499                                            luceneQuery, snippetField, StringUtil.merge(values),
500                                            termCollectingFormatter);
501                            }
502    
503                            if ((values == null) || (values.length == 0) ||
504                                    Validator.isNull(snippet)) {
505    
506                                    snippetField = field;
507    
508                                    values = doc.getValues(snippetField);
509    
510                                    if (Validator.isNull(values)) {
511                                            return StringPool.BLANK;
512                                    }
513    
514                                    snippet = LuceneHelperUtil.getSnippet(
515                                            luceneQuery, field, StringUtil.merge(values),
516                                            termCollectingFormatter);
517                            }
518    
519                            if (Validator.isNull(snippet)) {
520                                    return StringPool.BLANK;
521                            }
522    
523                            matchingTerms.addAll(termCollectingFormatter.getTerms());
524                    }
525                    catch (ParseException pe) {
526                            _log.error("Query " + query, pe);
527                    }
528    
529                    hitDoc.addText(
530                            Field.SNIPPET.concat(StringPool.UNDERLINE).concat(snippetField),
531                            snippet);
532    
533                    return snippet;
534            }
535    
536            protected Hits toHits(
537                            org.apache.lucene.search.IndexSearcher indexSearcher,
538                            HitDocs hitDocs, Query query, long startTime, float searchTime,
539                            int start, int end)
540                    throws IOException, ParseException {
541    
542                    int length = hitDocs.getTotalHits();
543    
544                    if ((start == QueryUtil.ALL_POS) && (end == QueryUtil.ALL_POS)) {
545                            start = 0;
546                            end = length;
547                    }
548    
549                    Set<String> queryTerms = new HashSet<String>();
550    
551                    IndexReader indexReader = indexSearcher.getIndexReader();
552    
553                    List<String> indexedFieldNames = new ArrayList<String> (
554                            indexReader.getFieldNames(IndexReader.FieldOption.INDEXED));
555    
556                    org.apache.lucene.search.Query luceneQuery =
557                            (org.apache.lucene.search.Query)QueryTranslatorUtil.translate(
558                                    query);
559    
560                    int scoredFieldNamesCount = LuceneHelperUtil.countScoredFieldNames(
561                            luceneQuery, ArrayUtil.toStringArray(indexedFieldNames.toArray()));
562    
563                    Hits hits = new HitsImpl();
564    
565                    if ((start > -1) && (start <= end)) {
566                            if (end > length) {
567                                    end = length;
568                            }
569    
570                            if (start > end) {
571                                    start = end;
572                            }
573    
574                            int subsetTotal = end - start;
575    
576                            if (subsetTotal > PropsValues.INDEX_SEARCH_LIMIT) {
577                                    subsetTotal = PropsValues.INDEX_SEARCH_LIMIT;
578                            }
579    
580                            List<Document> subsetDocs = new ArrayList<Document>(subsetTotal);
581                            List<Float> subsetScores = new ArrayList<Float>(subsetTotal);
582    
583                            QueryConfig queryConfig = query.getQueryConfig();
584    
585                            for (int i = start; i < start + subsetTotal; i++) {
586                                    int docId = hitDocs.getDocId(i);
587    
588                                    org.apache.lucene.document.Document document =
589                                            indexSearcher.doc(docId);
590    
591                                    Document subsetDocument = getDocument(document);
592    
593                                    if (queryConfig.isHighlightEnabled()) {
594                                            Locale locale = queryConfig.getLocale();
595    
596                                            getSnippet(
597                                                    document, query, Field.CONTENT, locale, subsetDocument,
598                                                    queryTerms);
599                                            getSnippet(
600                                                    document, query, Field.DESCRIPTION, locale,
601                                                    subsetDocument, queryTerms);
602                                            getSnippet(
603                                                    document, query, Field.TITLE, locale, subsetDocument,
604                                                    queryTerms);
605                                    }
606    
607                                    subsetDocs.add(subsetDocument);
608    
609                                    Float subsetScore = hitDocs.getScore(i);
610    
611                                    if (scoredFieldNamesCount > 0) {
612                                            subsetScore = subsetScore / scoredFieldNamesCount;
613                                    }
614    
615                                    subsetScores.add(subsetScore);
616    
617                                    if (_log.isDebugEnabled()) {
618                                            try {
619                                                    Explanation explanation = indexSearcher.explain(
620                                                            luceneQuery, docId);
621    
622                                                    _log.debug(explanation.toString());
623                                            }
624                                            catch (Exception e) {
625                                            }
626                                    }
627                            }
628    
629                            if (!queryConfig.isHighlightEnabled()) {
630                                    queryTerms = getQueryTerms(query);
631                            }
632    
633                            hits.setDocs(subsetDocs.toArray(new Document[subsetDocs.size()]));
634                            hits.setLength(length);
635                            hits.setQuery(query);
636                            hits.setQueryTerms(
637                                    queryTerms.toArray(new String[queryTerms.size()]));
638                            hits.setScores(
639                                    subsetScores.toArray(new Float[subsetScores.size()]));
640                            hits.setSearchTime(searchTime);
641                            hits.setStart(startTime);
642                    }
643    
644                    return hits;
645            }
646    
647            private static Log _log = LogFactoryUtil.getLog(LuceneIndexSearcher.class);
648    
649            private class HitDocs {
650    
651                    public HitDocs(BrowseHit[] browseHits) {
652                            _browseHits = browseHits;
653                    }
654    
655                    public HitDocs(TopFieldDocs topFieldDocs) {
656                            _topFieldDocs = topFieldDocs;
657                    }
658    
659                    public int getDocId(int i) {
660                            if (_topFieldDocs != null) {
661                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
662    
663                                    return scoreDoc.doc;
664                            }
665                            else if (_browseHits != null) {
666                                    return _browseHits[i].getDocid();
667                            }
668    
669                            throw new IllegalStateException();
670                    }
671    
672                    public float getScore(int i) {
673                            if (_topFieldDocs != null) {
674                                    ScoreDoc scoreDoc = _topFieldDocs.scoreDocs[i];
675    
676                                    return scoreDoc.score;
677                            }
678                            else if (_browseHits != null) {
679                                    return _browseHits[i].getScore();
680                            }
681    
682                            throw new IllegalStateException();
683                    }
684    
685                    public int getTotalHits() {
686                            if (_topFieldDocs != null) {
687                                    return _topFieldDocs.totalHits;
688                            }
689                            else if (_browseHits != null) {
690                                    return _browseHits.length;
691                            }
692    
693                            throw new IllegalStateException();
694                    }
695    
696                    private BrowseHit[] _browseHits;
697                    private TopFieldDocs _topFieldDocs;
698    
699            }
700    
701            private class TermCollectingFormatter implements Formatter {
702    
703                    public Set<String> getTerms() {
704                            return _terms;
705                    }
706    
707                    public String highlightTerm(
708                            String originalText, TokenGroup tokenGroup) {
709    
710                            if (tokenGroup.getTotalScore() > 0) {
711                                    _terms.add(originalText);
712                            }
713    
714                            return originalText;
715                    }
716    
717                    private Set<String> _terms = new HashSet<String>();
718    
719            }
720    
721    }