abstract class AbstractWordExtractor extends AbstractExtractor
| Modifier and Type | Field and Description |
|---|---|
private Topic |
baseTopic |
private WordConfiguration |
config |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
AbstractWordExtractor() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap t) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap t) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap t) |
private void |
associateWord(Topic w,
Topic t,
Topic s,
TopicMap tm)
Associate the Topic word with a Topic t
|
private Topic |
createScoreTopic(java.lang.Float get,
TopicMap tm) |
private Topic |
createWordTopic(java.lang.String word,
TopicMap tm)
Create a Topic representing word
|
protected abstract java.lang.Object |
formNeedle(java.lang.String s) |
protected abstract java.lang.String |
getBNSuffix() |
(package private) abstract WordConfiguration |
getConfig() |
protected abstract java.lang.String |
getSIBase() |
protected boolean |
handleWordList(java.util.List<java.lang.String> words,
TopicMap tm)
Associates each topic in current context with a word if the word is found
in the topic instance data.
|
protected abstract float |
isMatch(java.lang.Object needle,
java.lang.String haystack) |
private java.util.HashMap<Topic,java.lang.Float> |
solveTopics(java.lang.String word,
TopicMap tm)
Find topics with content matching 'word' according to given configuration
|
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getContentTypes, getCrawlerMode, getDescription, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getIcon, getInterruptsHandled, getMasterSubject, getName, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMap, useURLCrawleraddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStateprivate Topic baseTopic
private WordConfiguration config
public boolean _extractTopicsFrom(java.io.File f,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.net.URL u,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap t)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionabstract WordConfiguration getConfig()
protected boolean handleWordList(java.util.List<java.lang.String> words,
TopicMap tm)
throws TopicMapException
words - a list of words to look for in instance datatm - TopicMapExceptionprivate java.util.HashMap<Topic,java.lang.Float> solveTopics(java.lang.String word, TopicMap tm)
word - tm - private Topic createWordTopic(java.lang.String word, TopicMap tm) throws TopicMapException
word - tm - TopicMapException - if topic creation failsprivate Topic createScoreTopic(java.lang.Float get, TopicMap tm) throws TopicMapException
TopicMapExceptionprivate void associateWord(Topic w, Topic t, Topic s, TopicMap tm) throws TopicMapException
w - t - tm - TopicMapException - if creating the association failsprotected abstract java.lang.Object formNeedle(java.lang.String s)
protected abstract java.lang.String getBNSuffix()
protected abstract java.lang.String getSIBase()
protected abstract float isMatch(java.lang.Object needle,
java.lang.String haystack)
Copyright 2004-2015 Wandora Team