private static class MediaWikiExtractor.WikiParser
extends java.lang.Object
implements org.xml.sax.ContentHandler, org.xml.sax.ErrorHandler
| Modifier and Type | Field and Description |
|---|---|
static java.lang.String |
CONTRIBUTOR_SI |
private java.lang.String |
data_base |
private java.lang.String |
data_contributorid |
private java.util.HashSet<java.lang.String> |
data_contributors |
private int |
data_latestrevision |
private java.lang.String |
data_latesttext |
private java.lang.String |
data_latesttimestamp |
private java.lang.String |
data_namespace |
private java.util.Vector<java.lang.String> |
data_namespaces |
private java.lang.String |
data_pageid |
private java.lang.String |
data_restrictions |
private java.lang.String |
data_revisionid |
private java.lang.String |
data_sitename |
private java.lang.String |
data_text |
private java.lang.String |
data_timestamp |
private java.lang.String |
data_title |
private java.lang.String |
data_username |
static java.lang.String |
PAGE_SI |
private MediaWikiExtractor |
parent |
static java.lang.String |
REDIRECT_FROM_SI |
static java.lang.String |
REDIRECT_SI |
static java.lang.String |
REDIRECT_TO_SI |
static java.util.regex.Pattern |
redirectPattern |
static java.lang.String |
SIPREFIX |
private int |
state |
private static int |
STATE_BASE |
private static int |
STATE_COMMENT |
private static int |
STATE_CONTRIBUTOR |
private static int |
STATE_CONTRIBUTORID |
private static int |
STATE_MEDIAWIKI |
private static int |
STATE_NAMESPACE |
private static int |
STATE_NAMESPACES |
private static int |
STATE_PAGE |
private static int |
STATE_PAGEID |
private static int |
STATE_RESTRICTIONS |
private static int |
STATE_REVISION |
private static int |
STATE_REVISIONID |
private static int |
STATE_SITEINFO |
private static int |
STATE_SITENAME |
private static int |
STATE_START |
private static int |
STATE_TEXT |
private static int |
STATE_TIMESTAMP |
private static int |
STATE_TITLE |
private static int |
STATE_USERNAME |
static java.lang.String |
TAG_BASE |
static java.lang.String |
TAG_CASE |
static java.lang.String |
TAG_COMMENT |
static java.lang.String |
TAG_CONTRIBUTOR |
static java.lang.String |
TAG_CONTRIBUTORID |
static java.lang.String |
TAG_GENERATOR |
static java.lang.String |
TAG_MEDIAWIKI |
static java.lang.String |
TAG_NAMESPACE |
static java.lang.String |
TAG_NAMESPACES |
static java.lang.String |
TAG_PAGE |
static java.lang.String |
TAG_PAGEID |
static java.lang.String |
TAG_RESTRICTIONS |
static java.lang.String |
TAG_REVISION |
static java.lang.String |
TAG_REVISIONID |
static java.lang.String |
TAG_SITEINFO |
static java.lang.String |
TAG_SITENAME |
static java.lang.String |
TAG_TEXT |
static java.lang.String |
TAG_TIMESTAMP |
static java.lang.String |
TAG_TITLE |
static java.lang.String |
TAG_USERNAME |
static java.lang.String |
TEXT_SI |
static java.lang.String |
TIMESTAMP_SI |
private TopicMap |
tm |
private java.lang.String |
url |
static java.lang.String |
WIKI_SI |
| Constructor and Description |
|---|
WikiParser(java.lang.String wikiUrl,
TopicMap tm,
MediaWikiExtractor parent) |
| Modifier and Type | Method and Description |
|---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endDocument() |
void |
endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName) |
void |
endPrefixMapping(java.lang.String prefix) |
void |
error(org.xml.sax.SAXParseException exception) |
void |
fatalError(org.xml.sax.SAXParseException exception) |
private Topic |
getOrCreateTopic(java.lang.String si) |
private Topic |
getOrCreateTopic(java.lang.String si,
java.lang.String bn) |
void |
ignorableWhitespace(char[] ch,
int start,
int length) |
void |
processingInstruction(java.lang.String target,
java.lang.String data) |
void |
setDocumentLocator(org.xml.sax.Locator locator) |
void |
skippedEntity(java.lang.String name) |
void |
startDocument() |
void |
startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts) |
void |
startPrefixMapping(java.lang.String prefix,
java.lang.String uri) |
void |
warning(org.xml.sax.SAXParseException exception) |
private java.lang.String url
private TopicMap tm
private MediaWikiExtractor parent
public static final java.util.regex.Pattern redirectPattern
public static final java.lang.String TAG_MEDIAWIKI
public static final java.lang.String TAG_SITEINFO
public static final java.lang.String TAG_SITENAME
public static final java.lang.String TAG_BASE
public static final java.lang.String TAG_GENERATOR
public static final java.lang.String TAG_CASE
public static final java.lang.String TAG_NAMESPACES
public static final java.lang.String TAG_NAMESPACE
public static final java.lang.String TAG_PAGE
public static final java.lang.String TAG_TITLE
public static final java.lang.String TAG_PAGEID
public static final java.lang.String TAG_RESTRICTIONS
public static final java.lang.String TAG_REVISION
public static final java.lang.String TAG_REVISIONID
public static final java.lang.String TAG_TIMESTAMP
public static final java.lang.String TAG_CONTRIBUTOR
public static final java.lang.String TAG_COMMENT
public static final java.lang.String TAG_USERNAME
public static final java.lang.String TAG_CONTRIBUTORID
public static final java.lang.String TAG_TEXT
private static final int STATE_START
private static final int STATE_MEDIAWIKI
private static final int STATE_SITEINFO
private static final int STATE_SITENAME
private static final int STATE_BASE
private static final int STATE_NAMESPACES
private static final int STATE_NAMESPACE
private static final int STATE_PAGE
private static final int STATE_TITLE
private static final int STATE_PAGEID
private static final int STATE_RESTRICTIONS
private static final int STATE_REVISION
private static final int STATE_REVISIONID
private static final int STATE_TIMESTAMP
private static final int STATE_CONTRIBUTOR
private static final int STATE_COMMENT
private static final int STATE_USERNAME
private static final int STATE_CONTRIBUTORID
private static final int STATE_TEXT
private int state
public static java.lang.String SIPREFIX
public static java.lang.String CONTRIBUTOR_SI
public static java.lang.String PAGE_SI
public static java.lang.String TIMESTAMP_SI
public static java.lang.String TEXT_SI
public static java.lang.String WIKI_SI
public static java.lang.String REDIRECT_SI
public static java.lang.String REDIRECT_FROM_SI
public static java.lang.String REDIRECT_TO_SI
private java.lang.String data_sitename
private java.lang.String data_base
private java.util.Vector<java.lang.String> data_namespaces
private java.lang.String data_namespace
private java.lang.String data_title
private java.lang.String data_pageid
private java.lang.String data_restrictions
private java.lang.String data_revisionid
private java.lang.String data_timestamp
private java.lang.String data_username
private java.lang.String data_contributorid
private java.util.HashSet<java.lang.String> data_contributors
private java.lang.String data_text
private int data_latestrevision
private java.lang.String data_latesttext
private java.lang.String data_latesttimestamp
public WikiParser(java.lang.String wikiUrl,
TopicMap tm,
MediaWikiExtractor parent)
private Topic getOrCreateTopic(java.lang.String si) throws TopicMapException
TopicMapExceptionprivate Topic getOrCreateTopic(java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapExceptionpublic void startDocument()
throws org.xml.sax.SAXException
startDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void endDocument()
throws org.xml.sax.SAXException
endDocument in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void startElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName,
org.xml.sax.Attributes atts)
throws org.xml.sax.SAXException
startElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void endElement(java.lang.String uri,
java.lang.String localName,
java.lang.String qName)
throws org.xml.sax.SAXException
endElement in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void characters(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
characters in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void warning(org.xml.sax.SAXParseException exception)
throws org.xml.sax.SAXException
warning in interface org.xml.sax.ErrorHandlerorg.xml.sax.SAXExceptionpublic void error(org.xml.sax.SAXParseException exception)
throws org.xml.sax.SAXException
error in interface org.xml.sax.ErrorHandlerorg.xml.sax.SAXExceptionpublic void fatalError(org.xml.sax.SAXParseException exception)
throws org.xml.sax.SAXException
fatalError in interface org.xml.sax.ErrorHandlerorg.xml.sax.SAXExceptionpublic void ignorableWhitespace(char[] ch,
int start,
int length)
throws org.xml.sax.SAXException
ignorableWhitespace in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void processingInstruction(java.lang.String target,
java.lang.String data)
throws org.xml.sax.SAXException
processingInstruction in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void startPrefixMapping(java.lang.String prefix,
java.lang.String uri)
throws org.xml.sax.SAXException
startPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void endPrefixMapping(java.lang.String prefix)
throws org.xml.sax.SAXException
endPrefixMapping in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionpublic void setDocumentLocator(org.xml.sax.Locator locator)
setDocumentLocator in interface org.xml.sax.ContentHandlerpublic void skippedEntity(java.lang.String name)
throws org.xml.sax.SAXException
skippedEntity in interface org.xml.sax.ContentHandlerorg.xml.sax.SAXExceptionCopyright 2004-2015 Wandora Team