001
014
015 package com.liferay.portal.search.lucene;
016
017 import java.io.IOException;
018 import java.io.Reader;
019
020 import org.apache.lucene.analysis.KeywordTokenizer;
021 import org.apache.lucene.analysis.ReusableAnalyzerBase;
022 import org.apache.lucene.analysis.TokenFilter;
023 import org.apache.lucene.analysis.TokenStream;
024 import org.apache.lucene.analysis.Tokenizer;
025 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
026
027
031 public class UpperCaseKeywordAnalyzer extends ReusableAnalyzerBase {
032
033 @Override
034 protected TokenStreamComponents createComponents(
035 String fieldName, Reader reader) {
036
037 Tokenizer tokenizer = new KeywordTokenizer(reader);
038
039 return new TokenStreamComponents(
040 tokenizer, new UpperCaseFilter(tokenizer));
041 }
042
043
048 private class UpperCaseFilter extends TokenFilter {
049
050 public UpperCaseFilter(TokenStream tokenStream) {
051 super(tokenStream);
052 }
053
054 @Override
055 public final boolean incrementToken() throws IOException {
056 if (input.incrementToken()) {
057 toUpperCase(
058 _charTermAttribute.buffer(), 0,
059 _charTermAttribute.length());
060
061 return true;
062 }
063
064 return false;
065 }
066
067 protected void toUpperCase(
068 char[] buffer, final int offset, final int limit) {
069
070 assert (buffer.length >= limit);
071 assert ((offset <= 0) && (offset <= buffer.length));
072
073 for (int i = offset; i < limit;) {
074 i +=
075 Character.toChars(
076 Character.toUpperCase(
077 Character.codePointAt(buffer, i, limit)),
078 buffer, i);
079 }
080 }
081
082 private final CharTermAttribute _charTermAttribute = addAttribute(
083 CharTermAttribute.class);
084
085 }
086
087 }