1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    *
5    *
6    *
7    * The contents of this file are subject to the terms of the Liferay Enterprise
8    * Subscription License ("License"). You may not use this file except in
9    * compliance with the License. You can obtain a copy of the License by
10   * contacting Liferay, Inc. See the License for the specific language governing
11   * permissions and limitations under the License, including but not limited to
12   * distribution rights of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.util;
24  
25  import java.io.IOException;
26  import java.io.Reader;
27  
28  import java.util.ArrayList;
29  import java.util.List;
30  
31  import javax.swing.text.MutableAttributeSet;
32  import javax.swing.text.html.HTML;
33  import javax.swing.text.html.HTMLEditorKit;
34  
35  /**
36   * <a href="HTMLParser.java.html"><b><i>View Source</i></b></a>
37   *
38   * @author Brian Wing Shun Chan
39   *
40   */
41  public class HTMLParser {
42  
43      public HTMLParser(Reader reader) throws IOException {
44          HTMLEditorKit.Parser parser = new DefaultParser().getParser();
45  
46          parser.parse(reader, new HTMLCallback(), true);
47      }
48  
49      public List<String> getImages() {
50          return _images;
51      }
52  
53      public List<String> getLinks() {
54          return _links;
55      }
56  
57      private List<String> _images = new ArrayList<String>();
58      private List<String> _links = new ArrayList<String>();
59  
60      private class DefaultParser extends HTMLEditorKit {
61  
62          public HTMLEditorKit.Parser getParser() {
63              return super.getParser();
64          }
65  
66      }
67  
68      private class HTMLCallback extends HTMLEditorKit.ParserCallback{
69  
70          public void handleText(char[] data, int pos) {
71          }
72  
73          public void handleStartTag(
74              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
75  
76              if (tag.equals(HTML.Tag.A)) {
77                  String href = (String)attributes.getAttribute(
78                      HTML.Attribute.HREF);
79  
80                  if (href != null) {
81                      _links.add(href);
82                  }
83              }
84              else if (tag.equals(HTML.Tag.IMG)) {
85                  String src = (String)attributes.getAttribute(
86                      HTML.Attribute.SRC);
87  
88                  if (src != null) {
89                      _images.add(src);
90                  }
91              }
92          }
93  
94          public void handleEndTag(HTML.Tag tag, int pos) {
95          }
96  
97          public void handleSimpleTag(
98              HTML.Tag tag, MutableAttributeSet attributes, int pos) {
99  
100             if (tag.equals(HTML.Tag.A)) {
101                 String href = (String)attributes.getAttribute(
102                     HTML.Attribute.HREF);
103 
104                 if (href != null) {
105                     _links.add(href);
106                 }
107             }
108             else if (tag.equals(HTML.Tag.IMG)) {
109                 String src = (String)attributes.getAttribute(
110                     HTML.Attribute.SRC);
111 
112                 if (src != null) {
113                     _images.add(src);
114                 }
115             }
116         }
117 
118         public void handleComment(char[] data, int pos) {
119         }
120 
121         public void handleError(String errorMsg, int pos) {
122         }
123 
124     }
125 
126 }