public class GuardianContentSearchExtractor extends AbstractGuardianExtractor
| Modifier and Type | Field and Description |
|---|---|
private static java.lang.String |
currentURL |
private static java.lang.String |
defaultLang |
private java.lang.String |
defaultPagingOption |
private boolean |
shouldHandlePagination |
API_URL_SI, CONTENT_SI, DATE_SI, FIELD_BASE_SI, GUARDIAN_BASE_SI, ID_SI, LANG_SI, PUBLICATION_TIME_SI, SECTION_ID_SI, SECTION_NAME_SI, TAG_BASE_SI, TITLE_SI, WEB_URL_SICUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
GuardianContentSearchExtractor() |
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
private void |
handlePagination(org.json.JSONObject json,
TopicMap tm) |
private void |
nap() |
void |
parse(org.json.JSONObject json,
TopicMap tm) |
private void |
parseFieldAssociation(org.json.JSONObject result,
java.lang.String jsonObjectName,
TopicMap tm,
Topic ct,
Topic ty) |
private void |
parseOccurrence(org.json.JSONObject result,
java.lang.String jsonObjectName,
TopicMap tm,
Topic t,
Topic ty) |
void |
parseResult(org.json.JSONObject result,
TopicMap tm) |
private void |
parseTagAssociation(org.json.JSONObject result,
java.lang.String jsonObjectName,
TopicMap tm,
Topic ct,
Topic cty) |
getContentType, getContentTypes, getDateTopic, getDateType, getFieldTopic, getFieldTopicType, getFieldType, getGuardianType, getIcon, getIDType, getLangTopic, getOrCreateTopic, getOrCreateTopic, getPubTimeType, getTagTopic, getTagTopicType, getTagType, getTitleType, getWandoraClassTopic, makeSubclassOf, runInOwnThread, useURLCrawleracceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMapaddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStateprivate static java.lang.String defaultLang
private static java.lang.String currentURL
private boolean shouldHandlePagination
private java.lang.String defaultPagingOption
public java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractGuardianExtractorpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractGuardianExtractorpublic boolean _extractTopicsFrom(java.io.File f,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.net.URL u,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic void parse(org.json.JSONObject json,
TopicMap tm)
throws TopicMapException
TopicMapExceptionprivate void handlePagination(org.json.JSONObject json,
TopicMap tm)
private void nap()
public void parseResult(org.json.JSONObject result,
TopicMap tm)
throws org.json.JSONException,
TopicMapException
org.json.JSONExceptionTopicMapExceptionprivate void parseOccurrence(org.json.JSONObject result,
java.lang.String jsonObjectName,
TopicMap tm,
Topic t,
Topic ty)
private void parseFieldAssociation(org.json.JSONObject result,
java.lang.String jsonObjectName,
TopicMap tm,
Topic ct,
Topic ty)
Copyright 2004-2015 Wandora Team