| JerichoHTMLTextExtractor.java |
1 /**
2 * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3 *
4 * The contents of this file are subject to the terms of the Liferay Enterprise
5 * Subscription License ("License"). You may not use this file except in
6 * compliance with the License. You can obtain a copy of the License by
7 * contacting Liferay, Inc. See the License for the specific language governing
8 * permissions and limitations under the License, including but not limited to
9 * distribution rights of the Software.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 * SOFTWARE.
18 */
19
20 package com.liferay.util.lucene;
21
22 import au.id.jericho.lib.html.Source;
23
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.io.Reader;
27 import java.io.StringReader;
28
29 import org.apache.jackrabbit.extractor.HTMLTextExtractor;
30
31 /**
32 * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a>
33 *
34 * @author Brian Wing Shun Chan
35 *
36 */
37 public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
38
39 public Reader extractText(InputStream stream, String type, String encoding)
40 throws IOException {
41
42 Source source = new Source(stream);
43
44 return new StringReader(source.getTextExtractor().toString());
45 }
46
47 }