public class OCRExtractor extends AbstractExtractor
| Modifier and Type | Field and Description |
|---|---|
private java.lang.String[] |
contentTypes |
protected java.lang.String |
DATE_EXTRACTED_SI |
protected java.lang.String |
DATE_MODIFIED_SI |
protected java.text.SimpleDateFormat |
dateFormatter |
protected java.lang.String |
DOCUMENT_SI |
protected java.lang.String |
FILE_SIZE_SI |
protected java.lang.String |
SOURCE_SI |
protected java.lang.String |
TEMP_PATH |
protected java.lang.String |
TEXT_CONTENT_SI |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
OCRExtractor() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
boolean |
acceptBrowserExtractRequest(BrowserExtractRequest request,
Wandora wandora) |
Topic |
createDocumentTypeTopic(TopicMap tm) |
java.lang.String |
doBrowserExtract(BrowserExtractRequest request,
Wandora wandora) |
java.lang.String |
getBrowserExtractorName() |
Topic |
getContentType(TopicMap tm) |
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
Topic |
getDateModifiedType(TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
int |
getExtractorType() |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
protected Topic |
getOrCreateLangTopic(TopicMap tm,
java.lang.String lng6392) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
Topic |
getSizeType(TopicMap tm) |
Topic |
getSourceType(TopicMap tm) |
Topic |
getTimeExtractedType(TopicMap tm) |
Topic |
getWandoraClass(TopicMap tm) |
boolean |
isConfigurable()
Whether this tool is configurable.
|
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
private boolean |
processFile(java.io.File f,
TopicMap tm,
Topic documentTopic) |
boolean |
useURLCrawler() |
addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getCrawlerMode, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMapaddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStateprotected java.lang.String SOURCE_SI
protected java.lang.String DOCUMENT_SI
protected java.lang.String TEXT_CONTENT_SI
protected java.lang.String DATE_EXTRACTED_SI
protected java.lang.String DATE_MODIFIED_SI
protected java.lang.String FILE_SIZE_SI
protected java.lang.String TEMP_PATH
protected java.text.SimpleDateFormat dateFormatter
private final java.lang.String[] contentTypes
public java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractExtractorpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractExtractorpublic javax.swing.Icon getIcon()
AbstractWandoraToolgetIcon should return Icon object of
the tool.getIcon in interface WandoraToolgetIcon in class AbstractExtractorpublic java.lang.String[] getContentTypes()
HandlerContentHandler can process.getContentTypes in interface HandlergetContentTypes in class AbstractExtractorpublic boolean useURLCrawler()
useURLCrawler in class AbstractExtractorpublic int getExtractorType()
getExtractorType in class AbstractExtractorpublic boolean isConfigurable()
AbstractWandoraToolisConfigurable in interface WandoraToolisConfigurable in class AbstractWandoraToolpublic java.lang.String doBrowserExtract(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
doBrowserExtract in interface BrowserPluginExtractordoBrowserExtract in class AbstractExtractorTopicMapExceptionpublic boolean acceptBrowserExtractRequest(BrowserExtractRequest request, Wandora wandora) throws TopicMapException
acceptBrowserExtractRequest in interface BrowserPluginExtractoracceptBrowserExtractRequest in class AbstractExtractorTopicMapExceptionpublic java.lang.String getBrowserExtractorName()
getBrowserExtractorName in interface BrowserPluginExtractorgetBrowserExtractorName in class AbstractExtractorpublic boolean _extractTopicsFrom(java.io.File f,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.net.URL u,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionprivate boolean processFile(java.io.File f,
TopicMap tm,
Topic documentTopic)
throws TopicMapException
TopicMapExceptionpublic Topic getContentType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getDateModifiedType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getTimeExtractedType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSizeType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSourceType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic createDocumentTypeTopic(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapExceptionprotected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateLangTopic(TopicMap tm, java.lang.String lng6392) throws TopicMapException
TopicMapExceptionCopyright 2004-2015 Wandora Team