public class HTMLParser
extends javax.swing.text.html.parser.Parser
Modifier and Type | Field and Description |
---|---|
private java.lang.String |
content |
private java.net.URL |
currentPage |
private static javax.swing.text.html.parser.DTD |
HTMLDTD |
private java.lang.String[] |
linkTypes |
private java.util.ArrayList |
newURLs |
private int |
state |
private static int |
STATE_BODY |
private static int |
STATE_OTHER |
private static int |
STATE_TITLE |
private java.lang.String |
title |
ANY, CDATA, CONREF, CURRENT, DEFAULT, EMPTY, ENDTAG, ENTITIES, ENTITY, FIXED, GENERAL, ID, IDREF, IDREFS, IMPLIED, MD, MODEL, MS, NAME, NAMES, NMTOKEN, NMTOKENS, NOTATION, NUMBER, NUMBERS, NUTOKEN, NUTOKENS, PARAMETER, PI, PUBLIC, RCDATA, REQUIRED, SDATA, STARTTAG, SYSTEM
Constructor and Description |
---|
HTMLParser(javax.swing.text.html.parser.DTD dtd) |
HTMLParser(javax.swing.text.html.parser.DTD dtd,
java.lang.String[] linkTypes) |
HTMLParser(java.lang.String[] linkTypes) |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
getContent() |
java.net.URL[] |
getNewURLs() |
java.util.HashMap |
getOccurances() |
java.lang.String |
getTitle() |
protected void |
handleEmptyTag(javax.swing.text.html.parser.TagElement tag) |
protected void |
handleEndTag(javax.swing.text.html.parser.TagElement tag) |
private void |
handleLink(java.lang.String url) |
protected void |
handleStartTag(javax.swing.text.html.parser.TagElement tag) |
protected void |
handleText(char[] data) |
void |
parse(java.net.URL url,
java.io.Reader in) |
private boolean |
passLink(java.lang.String value) |
endTag, error, error, error, error, flushAttributes, getAttributes, getCurrentLine, getCurrentPos, handleComment, handleEOFInComment, handleError, handleTitle, makeTag, makeTag, markFirstTime, parse, parseDTDMarkup, parseMarkupDeclarations, startTag
private java.lang.String[] linkTypes
private java.util.ArrayList newURLs
private int state
private static final int STATE_OTHER
private static final int STATE_TITLE
private static final int STATE_BODY
private java.lang.String content
private java.lang.String title
private java.net.URL currentPage
private static javax.swing.text.html.parser.DTD HTMLDTD
public HTMLParser(javax.swing.text.html.parser.DTD dtd)
public HTMLParser(javax.swing.text.html.parser.DTD dtd, java.lang.String[] linkTypes)
public HTMLParser(java.lang.String[] linkTypes)
public void parse(java.net.URL url, java.io.Reader in) throws java.io.IOException
java.io.IOException
public java.net.URL[] getNewURLs()
public java.util.HashMap getOccurances()
public java.lang.String getContent()
public java.lang.String getTitle()
private boolean passLink(java.lang.String value)
protected void handleStartTag(javax.swing.text.html.parser.TagElement tag)
handleStartTag
in class javax.swing.text.html.parser.Parser
protected void handleEmptyTag(javax.swing.text.html.parser.TagElement tag)
handleEmptyTag
in class javax.swing.text.html.parser.Parser
protected void handleEndTag(javax.swing.text.html.parser.TagElement tag)
handleEndTag
in class javax.swing.text.html.parser.Parser
protected void handleText(char[] data)
handleText
in class javax.swing.text.html.parser.Parser
private void handleLink(java.lang.String url)
Copyright 2004-2015 Wandora Team