public abstract class AbstractJsoupExtractor extends AbstractExtractor
| Modifier and Type | Field and Description |
|---|---|
private java.lang.String[] |
contentTypes |
private static java.lang.String |
LANG_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
AbstractJsoupExtractor() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
abstract boolean |
extractTopicsFrom(org.jsoup.nodes.Document d,
java.lang.String u,
TopicMap t) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
protected static Topic |
getLangTopic(TopicMap tm) |
protected static Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected static Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
protected static Topic |
getWandoraClassTopic(TopicMap tm) |
protected static void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getName, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawleraddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStateprivate static final java.lang.String LANG_SI
private final java.lang.String[] contentTypes
public javax.swing.Icon getIcon()
AbstractWandoraToolgetIcon should return Icon object of
the tool.getIcon in interface WandoraToolgetIcon in class AbstractExtractorpublic java.lang.String[] getContentTypes()
HandlerContentHandler can process.getContentTypes in interface HandlergetContentTypes in class AbstractExtractorpublic boolean _extractTopicsFrom(java.io.File f,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.net.URL u,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionprotected static Topic getWandoraClassTopic(TopicMap tm) throws TopicMapException
TopicMapExceptionprotected static Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapExceptionprotected static Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapExceptionprotected static void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapExceptionprotected static Topic getLangTopic(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic abstract boolean extractTopicsFrom(org.jsoup.nodes.Document d,
java.lang.String u,
TopicMap t)
throws java.lang.Exception
java.lang.ExceptionCopyright 2004-2015 Wandora Team