001    /**
002     * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.util;
016    
017    import java.io.IOException;
018    import java.io.Reader;
019    
020    import java.util.ArrayList;
021    import java.util.List;
022    
023    import javax.swing.text.MutableAttributeSet;
024    import javax.swing.text.html.HTML;
025    import javax.swing.text.html.HTMLEditorKit;
026    
027    /**
028     * @author Brian Wing Shun Chan
029     */
030    public class HTMLParser {
031    
032            public HTMLParser(Reader reader) throws IOException {
033                    HTMLEditorKit.Parser parser = new DefaultParser().getParser();
034    
035                    parser.parse(reader, new HTMLCallback(), true);
036            }
037    
038            public List<String> getImages() {
039                    return _images;
040            }
041    
042            public List<String> getLinks() {
043                    return _links;
044            }
045    
046            private final List<String> _images = new ArrayList<>();
047            private final List<String> _links = new ArrayList<>();
048    
049            private class DefaultParser extends HTMLEditorKit {
050    
051                    @Override
052                    public HTMLEditorKit.Parser getParser() {
053                            return super.getParser();
054                    }
055    
056            }
057    
058            private class HTMLCallback extends HTMLEditorKit.ParserCallback {
059    
060                    @Override
061                    public void handleComment(char[] data, int pos) {
062                    }
063    
064                    @Override
065                    public void handleEndTag(HTML.Tag tag, int pos) {
066                    }
067    
068                    @Override
069                    public void handleError(String errorMsg, int pos) {
070                    }
071    
072                    @Override
073                    public void handleSimpleTag(
074                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
075    
076                            if (tag.equals(HTML.Tag.A)) {
077                                    String href = (String)attributes.getAttribute(
078                                            HTML.Attribute.HREF);
079    
080                                    if (href != null) {
081                                            _links.add(href);
082                                    }
083                            }
084                            else if (tag.equals(HTML.Tag.IMG)) {
085                                    String src = (String)attributes.getAttribute(
086                                            HTML.Attribute.SRC);
087    
088                                    if (src != null) {
089                                            _images.add(src);
090                                    }
091                            }
092                    }
093    
094                    @Override
095                    public void handleStartTag(
096                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
097    
098                            if (tag.equals(HTML.Tag.A)) {
099                                    String href = (String)attributes.getAttribute(
100                                            HTML.Attribute.HREF);
101    
102                                    if (href != null) {
103                                            _links.add(href);
104                                    }
105                            }
106                            else if (tag.equals(HTML.Tag.IMG)) {
107                                    String src = (String)attributes.getAttribute(
108                                            HTML.Attribute.SRC);
109    
110                                    if (src != null) {
111                                            _images.add(src);
112                                    }
113                            }
114                    }
115    
116                    @Override
117                    public void handleText(char[] data, int pos) {
118                    }
119    
120            }
121    
122    }