001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.portal.search.lucene;
016    
017    import java.io.IOException;
018    import java.io.Reader;
019    
020    import org.apache.lucene.analysis.KeywordTokenizer;
021    import org.apache.lucene.analysis.ReusableAnalyzerBase;
022    import org.apache.lucene.analysis.TokenFilter;
023    import org.apache.lucene.analysis.TokenStream;
024    import org.apache.lucene.analysis.Tokenizer;
025    import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
026    
027    /**
028     * @author Tibor Lipusz
029     * @author Norbert Kocsis
030     */
031    public class UpperCaseKeywordAnalyzer extends ReusableAnalyzerBase {
032    
033            @Override
034            protected TokenStreamComponents createComponents(
035                    String fieldName, Reader reader) {
036    
037                    Tokenizer tokenizer = new KeywordTokenizer(reader);
038    
039                    return new TokenStreamComponents(
040                            tokenizer, new UpperCaseFilter(tokenizer));
041            }
042    
043            /**
044             * {@link https://issues.apache.org/jira/browse/LUCENE-5369}
045             * {@link https://github.com/apache/lucene-solr/blob/lucene_solr_4_7_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java}
046             * {@link https://github.com/apache/lucene-solr/blob/lucene_solr_4_7_0/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java}
047             */
048            private class UpperCaseFilter extends TokenFilter {
049    
050                    public UpperCaseFilter(TokenStream tokenStream) {
051                            super(tokenStream);
052                    }
053    
054                    @Override
055                    public final boolean incrementToken() throws IOException {
056                            if (input.incrementToken()) {
057                                    toUpperCase(
058                                            _charTermAttribute.buffer(), 0,
059                                            _charTermAttribute.length());
060    
061                                    return true;
062                            }
063    
064                            return false;
065                    }
066    
067                    protected void toUpperCase(
068                            char[] buffer, final int offset, final int limit) {
069    
070                            assert (buffer.length >= limit);
071                            assert ((offset <= 0) && (offset <= buffer.length));
072    
073                            for (int i = offset; i < limit;) {
074                                    i +=
075                                            Character.toChars(
076                                                    Character.toUpperCase(
077                                                            Character.codePointAt(buffer, i, limit)),
078                                                    buffer, i);
079                            }
080                    }
081    
082                    private final CharTermAttribute _charTermAttribute = addAttribute(
083                            CharTermAttribute.class);
084    
085            }
086    
087    }