| JerichoHTMLTextExtractor.java |
1 /**
2 * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3 *
4 *
5 *
6 *
7 * The contents of this file are subject to the terms of the Liferay Enterprise
8 * Subscription License ("License"). You may not use this file except in
9 * compliance with the License. You can obtain a copy of the License by
10 * contacting Liferay, Inc. See the License for the specific language governing
11 * permissions and limitations under the License, including but not limited to
12 * distribution rights of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 */
22
23 package com.liferay.util.lucene;
24
25 import au.id.jericho.lib.html.Source;
26
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.Reader;
30 import java.io.StringReader;
31
32 import org.apache.jackrabbit.extractor.HTMLTextExtractor;
33
34 /**
35 * <a href="JerichoHTMLTextExtractor.java.html"><b><i>View Source</i></b></a>
36 *
37 * @author Brian Wing Shun Chan
38 */
39 public class JerichoHTMLTextExtractor extends HTMLTextExtractor {
40
41 public Reader extractText(InputStream stream, String type, String encoding)
42 throws IOException {
43
44 Source source = new Source(stream);
45
46 return new StringReader(source.getTextExtractor().toString());
47 }
48
49 }