public class MillionFirstStepsBookTSVExtractor extends AbstractMillionFirstStepsExtractor
ARKID_SI, AUTHOR_SI, BASE_SI, BL_DLS_SI, BOOK_SI, BRITISH_LIBRARY_SI, CORPORATE_SI, DATE_SI, DATEFIELD_SI, defaultEncoding, defaultLang, EDITION_SI, IMAGE_SI, IMAGEIDX_SI, ISSUANCE_SI, LANG_SI, ORDER_SI, PAGE_SI, PDF_SI, PLACE_SI, PUBLISHER_SI, ROLE_SI, SHELFMARK_SI, TITLE_SI, VOLUME_SICUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
MillionFirstStepsBookTSVExtractor() |
| Modifier and Type | Method and Description |
|---|---|
void |
_extractTopicsFrom(java.io.File[] f) |
boolean |
_extractTopicsFrom(java.io.File f,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap tm) |
boolean |
_extractTopicsFrom(java.net.URL u,
TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
private java.lang.String |
getIndex(java.lang.String indexName,
java.lang.String[] array,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes) |
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
void |
handleFiles(java.io.File[] files,
TopicMap tm) |
private boolean |
isValid(java.lang.String data) |
void |
parse(java.lang.String str,
TopicMap tm) |
void |
parseColumnNames(java.lang.String columns,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes) |
void |
parseLine(java.lang.String str,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes,
TopicMap tm) |
doUrl, getArkIdTypeTopic, getATopic, getATypeTopic, getAuthorTopic, getAuthorTypeTopic, getBLDLSIdTypeTopic, getBookTopic, getBookTypeTopic, getBritishLibraryTypeTopic, getCorporateTopic, getCorporateTypeTopic, getDatefieldTopic, getDatefieldTypeTopic, getDateTopic, getDateTypeTopic, getEditionTopic, getEditionTypeTopic, getIcon, getImageIdxTypeTopic, getImageTopic, getImageTopic, getImageTypeTopic, getIssuanceTopic, getIssuanceTypeTopic, getLangTopic, getOrCreateTopic, getOrCreateTopic, getOrderTopic, getOrderTypeTopic, getPageTopic, getPageTypeTopic, getPDFTypeTopic, getPlaceTopic, getPlaceTypeTopic, getPublisherTopic, getPublisherTypeTopic, getRoleTopic, getRoleTypeTopic, getShelfmarkTopic, getShelfmarkTypeTopic, getTitleTopic, getTitleTypeTopic, getVolumeTopic, getVolumeTypeTopic, getWandoraClassTopic, makeSubclassOf, runInOwnThread, useTempTopicMap, useURLCrawleracceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getContentTypes, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncodeaddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, configure, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isConfigurable, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitconfigure, execute, execute, getContext, getToolMenuItem, hlog, initialize, isConfigurable, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStatepublic java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractMillionFirstStepsExtractorpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractMillionFirstStepsExtractorpublic boolean _extractTopicsFrom(java.io.File f,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic void _extractTopicsFrom(java.io.File[] f)
throws java.lang.Exception
java.lang.Exceptionpublic void handleFiles(java.io.File[] files,
TopicMap tm)
handleFiles in class AbstractExtractorpublic boolean _extractTopicsFrom(java.net.URL u,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap tm)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic void parse(java.lang.String str,
TopicMap tm)
public void parseColumnNames(java.lang.String columns,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes)
public void parseLine(java.lang.String str,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes,
TopicMap tm)
private boolean isValid(java.lang.String data)
private java.lang.String getIndex(java.lang.String indexName,
java.lang.String[] array,
java.util.HashMap<java.lang.String,java.lang.Integer> columnIndexes)
Copyright 2004-2015 Wandora Team