public class SimpleFileExtractor extends AbstractExtractor implements WandoraTool, BrowserPluginExtractor
| Modifier and Type | Field and Description |
|---|---|
private Wandora |
admin |
static java.lang.String[] |
contentTypes |
protected static java.lang.String |
DEFAULT_DATE_FORMAT |
private java.lang.String |
defaultLang |
protected static java.lang.String |
DOCUMENT_SI |
protected static java.lang.String |
SOURCE_SI |
protected static java.lang.String |
TOPIC_SI |
private java.util.ArrayList<java.lang.String> |
visitedDirectories |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
SimpleFileExtractor()
Creates a new instance of SimpleFileExtractor
|
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File file,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.net.URL url,
TopicMap topicMap) |
boolean |
browserExtractorConsumesPlainText() |
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
void |
execute(Wandora wandora,
Context context)
Runs the tool.
|
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
Topic |
getDocumentType(TopicMap tm) |
java.lang.String |
getGUIText(int textType) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
Topic |
getSourceType(TopicMap tm) |
Topic |
getTopicType(TopicMap tm) |
Topic |
getWandoraClass(TopicMap tm) |
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
java.lang.String |
solveTitle(java.lang.String content) |
boolean |
useTempTopicMap() |
boolean |
useURLCrawler() |
acceptBrowserExtractRequest, addCrawlerUrl, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, dropExtract, dropExtract, dropExtract, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncodeaddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, getType, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStateacceptBrowserExtractRequest, getBrowserExtractorNameprotected static java.lang.String TOPIC_SI
protected static java.lang.String SOURCE_SI
protected static java.lang.String DOCUMENT_SI
protected static java.lang.String DEFAULT_DATE_FORMAT
private java.lang.String defaultLang
private Wandora admin
private java.util.ArrayList<java.lang.String> visitedDirectories
public static final java.lang.String[] contentTypes
public SimpleFileExtractor()
public java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractExtractorpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractExtractorpublic javax.swing.Icon getIcon()
AbstractWandoraToolgetIcon should return Icon object of
the tool.getIcon in interface WandoraToolgetIcon in class AbstractExtractorpublic boolean useTempTopicMap()
useTempTopicMap in class AbstractExtractorpublic boolean useURLCrawler()
useURLCrawler in class AbstractExtractorpublic java.lang.String getGUIText(int textType)
getGUIText in class AbstractExtractorpublic java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract in interface BrowserPluginExtractordoBrowserExtract in class AbstractExtractorTopicMapExceptionpublic boolean browserExtractorConsumesPlainText()
browserExtractorConsumesPlainText in class AbstractExtractorpublic void execute(Wandora wandora, Context context)
WandoraToolexecute in interface WandoraToolexecute in class AbstractExtractorpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.net.URL url,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.io.File file,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic java.lang.String solveTitle(java.lang.String content)
public Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getTopicType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSourceType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getDocumentType(TopicMap tm) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapExceptionprotected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapExceptionpublic java.lang.String[] getContentTypes()
HandlerContentHandler can process.getContentTypes in interface HandlergetContentTypes in class AbstractExtractorCopyright 2004-2015 Wandora Team