public class JsoupDefinitionListExtractor extends AbstractJsoupExtractor implements WandoraTool, BrowserPluginExtractor
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
CHILD_SI |
private Topic |
childType |
static java.lang.String |
DEF_SI |
private Topic |
definitionType |
static java.lang.String |
DOCUMENT_SI |
private Topic |
documentTopic |
private Topic |
documentType |
private Topic |
langTopic |
static java.lang.String |
LIST_SI |
private Topic |
listType |
static java.lang.String |
NAME_SI |
static java.lang.String |
PARENT_SI |
private Topic |
parentType |
static java.lang.String |
RELATION_SI |
private Topic |
relationType |
static java.lang.String |
SI_PREFIX |
private TopicMap |
tm |
private Topic |
wandoraClass |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTOR
CLOSE, EXECUTE, INVISIBLE, VISIBLE, WAIT
RETURN_ERROR, RETURN_INFO
Constructor and Description |
---|
JsoupDefinitionListExtractor() |
Modifier and Type | Method and Description |
---|---|
(package private) void |
declareChild(Topic parent,
Topic child) |
boolean |
extractTopicsFrom(org.jsoup.nodes.Document d,
java.lang.String u,
TopicMap t) |
private void |
parseList(org.jsoup.nodes.Element list,
Topic documentTopic) |
private void |
parseName(org.jsoup.nodes.Element name,
Topic listTopic) |
_extractTopicsFrom, _extractTopicsFrom, _extractTopicsFrom, getContentTypes, getIcon, getLangTopic, getOrCreateTopic, getOrCreateTopic, getWandoraClassTopic, makeSubclassOf
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getName, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawler
addUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptions
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
configure, execute, execute, execute, getContext, getDescription, getIcon, getName, getToolMenuItem, getType, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptions
forceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setState
acceptBrowserExtractRequest, doBrowserExtract, getBrowserExtractorName
public static final java.lang.String SI_PREFIX
public static final java.lang.String LIST_SI
public static final java.lang.String DOCUMENT_SI
public static final java.lang.String NAME_SI
public static final java.lang.String DEF_SI
public static final java.lang.String CHILD_SI
public static final java.lang.String PARENT_SI
public static final java.lang.String RELATION_SI
private TopicMap tm
private Topic wandoraClass
private Topic documentTopic
private Topic langTopic
private Topic documentType
private Topic listType
private Topic definitionType
private Topic childType
private Topic parentType
private Topic relationType
public boolean extractTopicsFrom(org.jsoup.nodes.Document d, java.lang.String u, TopicMap t) throws java.lang.Exception
extractTopicsFrom
in class AbstractJsoupExtractor
java.lang.Exception
private void parseList(org.jsoup.nodes.Element list, Topic documentTopic) throws TopicMapException
TopicMapException
private void parseName(org.jsoup.nodes.Element name, Topic listTopic) throws TopicMapException
TopicMapException
void declareChild(Topic parent, Topic child) throws TopicMapException
TopicMapException
Copyright 2004-2015 Wandora Team