public class MarcXMLExtractor extends AbstractExtractor
| Modifier and Type | Class and Description |
|---|---|
private class |
MarcXMLExtractor.MarcXMLParser |
| Modifier and Type | Field and Description |
|---|---|
private java.lang.String[] |
authorCodes |
static java.lang.String |
BASENAME_PATTERN |
private java.util.ArrayList<java.lang.String> |
basenamePatterns |
private java.lang.String[] |
contentTypes |
static boolean |
CONVERT_LEADERS |
protected static java.lang.String |
DATA_SI |
private static java.lang.String |
defaultEncoding |
private static java.lang.String |
defaultLang |
static java.lang.String |
EXCLUDE_FIELDS |
static java.lang.String |
EXCLUDE_SUBFIELDS |
private java.util.HashMap |
excludeFields |
private java.util.HashMap |
excludeSubfields |
protected static java.lang.String |
FIELD_SI |
protected static java.lang.String |
FIELD_SI_TEMPLATE |
static java.lang.String |
INCLUDE_FIELDS |
static boolean |
INCLUDE_INDX_IN_ASSOCIATIONS |
static java.lang.String |
INCLUDE_SUBFIELDS |
private java.util.HashMap |
includeFields |
private java.util.HashMap |
includeSubfields |
protected static java.lang.String |
IND_SI |
protected static java.lang.String |
LEADER_SI |
protected static java.lang.String |
MARC_SI |
protected static java.lang.String |
RECORD_SI |
static java.lang.String |
RECORD_SI_PATTERN |
private java.util.ArrayList<java.lang.String> |
recordSIPatterns |
static boolean |
SOLVE_FIELD_NAMES |
static boolean |
SOLVE_SUBFIELD_NAMES |
protected static java.lang.String |
SUBFIELDCODE_SI |
private java.lang.String[] |
titleCodes |
static boolean |
TRIM_DATAS |
CUSTOM_EXTRACTOR, DONE_FAILED, DONE_MANY, DONE_ONE, EXACTLY_GIVEN_URLS, FILE_EXTRACTOR, FILE_PATTERN, GIVEN_URLS_AND_ALL_CRAWLED_DOCUMENTS, GIVEN_URLS_AND_CRAWLED_DOCUMENTS_IN_URL_DOMAIN, GIVEN_URLS_AND_LINKED_DOCUMENTS, GIVEN_URLS_AND_URL_BELOW, INFO_WAIT_WHILE_WORKING, LOG_TITLE, POINT_START_URL_TEXT, RAW_EXTRACTOR, SELECT_DIALOG_TITLE, STRING_EXTRACTOR_NOT_SUPPORTED_MESSAGE, URL_EXTRACTORCLOSE, EXECUTE, INVISIBLE, VISIBLE, WAITRETURN_ERROR, RETURN_INFO| Constructor and Description |
|---|
MarcXMLExtractor()
Creates a new instance of MarcXMLExtractor
|
| Modifier and Type | Method and Description |
|---|---|
boolean |
_extractTopicsFrom(java.io.File file,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(org.xml.sax.InputSource in,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.io.InputStream in,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.lang.String str,
TopicMap topicMap) |
boolean |
_extractTopicsFrom(java.net.URL url,
TopicMap topicMap) |
void |
configure(Wandora admin,
Options options,
java.lang.String prefix)
If the tool is configurable, shows an user interface to configure the tool.
|
java.lang.String[] |
getContentTypes()
Returns an array of String containing the content-types this
ContentHandler can process. |
Topic |
getDataType(TopicMap tm) |
java.lang.String |
getDescription()
AdminToolManager views tool descriptions while user browses available
tools and build user customizable GUI elements such as Tools menu.
|
Topic |
getFieldTopic(java.lang.String field,
TopicMap tm) |
Topic |
getFieldType(TopicMap tm) |
javax.swing.Icon |
getIcon()
All tools may have identifying graphic icon used within tool GUI elements.
|
Topic |
getInd1Topic(java.lang.String ind1,
java.lang.String tag,
TopicMap tm) |
Topic |
getInd1Type(java.lang.String tag,
TopicMap tm) |
Topic |
getInd2Topic(java.lang.String ind2,
java.lang.String tag,
TopicMap tm) |
Topic |
getInd2Type(java.lang.String tag,
TopicMap tm) |
java.lang.String |
getIndicatorName(java.lang.String field,
java.lang.String indicatorId,
java.lang.String value) |
java.lang.String |
getIndicatorValueName(java.lang.String field,
java.lang.String indicatorId,
java.lang.String value) |
Topic |
getIndType(TopicMap tm) |
Topic |
getLeaderType(TopicMap tm) |
Topic |
getMARCClass(TopicMap tm) |
java.lang.String |
getName()
Tools name represent the tool in UI unless the tool has been given
explicitly another GUI name.
|
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn) |
protected Topic |
getOrCreateTopic(TopicMap tm,
java.lang.String si,
java.lang.String bn,
Topic type) |
Topic |
getRecordType(TopicMap tm) |
Topic |
getSubFieldCodeTopic(java.lang.String subfied,
int counter,
java.lang.String field,
java.lang.String ind1Modifier,
java.lang.String ind2Modifier,
TopicMap tm) |
Topic |
getSubFieldCodeTopic(java.lang.String subfied,
java.lang.String field,
java.lang.String ind1Modifier,
java.lang.String ind2Modifier,
TopicMap tm) |
Topic |
getSubFieldCodeType(TopicMap tm) |
Topic |
getSubFieldDataTopic(java.lang.String data,
java.lang.String tagModifier,
java.lang.String ind1Modifier,
java.lang.String ind2Modifier,
TopicMap tm) |
Topic |
getTopic(TopicMap tm,
java.lang.String str,
java.lang.String SIBase,
Topic type) |
Topic |
getWandoraClass(TopicMap tm) |
boolean |
isConfigurable()
Whether this tool is configurable.
|
protected java.lang.String |
makeFieldSI(java.lang.String field) |
protected java.lang.String |
makeSI(java.lang.String base,
java.lang.String endPoint) |
protected void |
makeSubclassOf(TopicMap tm,
Topic t,
Topic superclass) |
void |
parseBasenamePatterns(java.lang.String patterns) |
private java.util.HashMap |
parseFieldCodes(java.lang.String str) |
void |
parseSIPatterns(java.lang.String patterns) |
java.util.HashMap |
parseSubfieldCodes(java.lang.String str) |
void |
processControlField(java.lang.String field,
java.lang.String data,
Topic record,
Topic type,
TopicMap tm) |
protected void |
topicalize(java.lang.String leader,
java.util.ArrayList<MarcField> datafields,
java.util.HashMap<java.lang.String,java.lang.String> controlfields,
java.util.ArrayList<java.lang.String> subjectIdentifiers,
java.util.ArrayList<java.lang.String> basenames,
TopicMap tm) |
boolean |
useURLCrawler() |
acceptBrowserExtractRequest, addCrawlerUrl, browserExtractorConsumesPlainText, buildSI, buildSL, clearMasterSubject, createAssociation, createAssociation, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, createTopic, croppedFilename, croppedFilename, croppedUrlString, croppedUrlString, doBrowserExtract, dropExtract, dropExtract, dropExtract, execute, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFrom, extractTopicsFromText, getBrowserExtractorName, getCrawlerMode, getExtractorType, getForceContent, getForceFiles, getForceUrls, getGUIText, getGUIText, getInterruptsHandled, getMasterSubject, getType, getWandora, handle, handleContent, handleCustomType, handleFiles, handleForcedContent, handleInterrupt, handleStringContent, handleUrls, initializeCustomType, instantDropHandle, makeSubclassOfWandoraClass, runInOwnThread, setData, setDisplayName, setForceContent, setForceFiles, setForceUrls, setMasterSubject, setMasterSubject, setTopicMap, setupCrawler, setWandora, takeNap, urlEncode, useTempTopicMapaddUndoMarker, addUndoMarker, allowMultipleInvocations, clearAllThreads, clearThreads, clearThreads, clearToolLock, clearToolLock, clearToolLocks, execute, execute, forceStop, forceStop, getContext, getCurrentLogger, getDefaultLogger, getHistory, getLastLogger, getState, getThreads, getThreads, getToolMenuItem, getToolMenuItem, getTopicName, hlog, initialize, interruptAllThreads, interruptThreads, interruptThreads, isRunning, isRunning, lockLog, log, log, log, log, requiresRefresh, run, setContext, setDefaultLogger, setLogTitle, setProgress, setProgressMax, setState, setToolLogger, singleLog, singleLog, singleLog, solveContextTopicMap, solveNameForTopicMap, writeOptionsclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitexecute, execute, getContext, getToolMenuItem, hlog, initialize, isRunning, log, log, log, log, requiresRefresh, setContext, setToolLogger, writeOptionsforceStop, getHistory, getState, lockLog, setLogTitle, setProgress, setProgressMax, setStatepublic static boolean TRIM_DATAS
public static boolean INCLUDE_INDX_IN_ASSOCIATIONS
public static boolean SOLVE_FIELD_NAMES
public static boolean SOLVE_SUBFIELD_NAMES
public static boolean CONVERT_LEADERS
public static java.lang.String EXCLUDE_FIELDS
public static java.lang.String INCLUDE_FIELDS
public static java.lang.String EXCLUDE_SUBFIELDS
public static java.lang.String INCLUDE_SUBFIELDS
public static java.lang.String RECORD_SI_PATTERN
public static java.lang.String BASENAME_PATTERN
protected static java.lang.String MARC_SI
protected static java.lang.String IND_SI
protected static java.lang.String SUBFIELDCODE_SI
protected static java.lang.String LEADER_SI
protected static java.lang.String FIELD_SI
protected static java.lang.String FIELD_SI_TEMPLATE
protected static java.lang.String DATA_SI
protected static java.lang.String RECORD_SI
private static java.lang.String defaultEncoding
private static java.lang.String defaultLang
private java.util.HashMap excludeFields
private java.util.HashMap includeFields
private java.util.HashMap excludeSubfields
private java.util.HashMap includeSubfields
private java.util.ArrayList<java.lang.String> recordSIPatterns
private java.util.ArrayList<java.lang.String> basenamePatterns
private final java.lang.String[] contentTypes
private java.lang.String[] titleCodes
private java.lang.String[] authorCodes
public MarcXMLExtractor()
public java.lang.String getName()
AbstractWandoraToolgetName in interface WandoraToolgetName in class AbstractExtractorpublic java.lang.String getDescription()
AbstractWandoraToolgetDescription in interface WandoraToolgetDescription in class AbstractExtractorpublic javax.swing.Icon getIcon()
AbstractWandoraToolgetIcon should return Icon object of
the tool.getIcon in interface WandoraToolgetIcon in class AbstractExtractorpublic java.lang.String[] getContentTypes()
HandlerContentHandler can process.getContentTypes in interface HandlergetContentTypes in class AbstractExtractorpublic boolean useURLCrawler()
useURLCrawler in class AbstractExtractorpublic boolean isConfigurable()
AbstractWandoraToolisConfigurable in interface WandoraToolisConfigurable in class AbstractWandoraToolpublic void configure(Wandora admin, Options options, java.lang.String prefix) throws TopicMapException
AbstractWandoraToolconfigure in interface WandoraToolconfigure in class AbstractWandoraToolTopicMapExceptionprivate java.util.HashMap parseFieldCodes(java.lang.String str)
public java.util.HashMap parseSubfieldCodes(java.lang.String str)
public void parseSIPatterns(java.lang.String patterns)
public void parseBasenamePatterns(java.lang.String patterns)
public boolean _extractTopicsFrom(java.net.URL url,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.io.File file,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.lang.String str,
TopicMap topicMap)
throws java.lang.Exception
_extractTopicsFrom in class AbstractExtractorjava.lang.Exceptionpublic boolean _extractTopicsFrom(java.io.InputStream in,
TopicMap topicMap)
throws java.lang.Exception
java.lang.Exceptionpublic boolean _extractTopicsFrom(org.xml.sax.InputSource in,
TopicMap topicMap)
throws java.lang.Exception
java.lang.Exceptionprotected void topicalize(java.lang.String leader,
java.util.ArrayList<MarcField> datafields,
java.util.HashMap<java.lang.String,java.lang.String> controlfields,
java.util.ArrayList<java.lang.String> subjectIdentifiers,
java.util.ArrayList<java.lang.String> basenames,
TopicMap tm)
public void processControlField(java.lang.String field,
java.lang.String data,
Topic record,
Topic type,
TopicMap tm)
throws TopicMapException
TopicMapExceptionpublic Topic getLeaderType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getFieldTopic(java.lang.String field, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getFieldType(TopicMap tm) throws TopicMapException
TopicMapExceptionprotected java.lang.String makeFieldSI(java.lang.String field)
public Topic getSubFieldCodeType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSubFieldCodeTopic(java.lang.String subfied, int counter, java.lang.String field, java.lang.String ind1Modifier, java.lang.String ind2Modifier, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSubFieldCodeTopic(java.lang.String subfied, java.lang.String field, java.lang.String ind1Modifier, java.lang.String ind2Modifier, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getSubFieldDataTopic(java.lang.String data, java.lang.String tagModifier, java.lang.String ind1Modifier, java.lang.String ind2Modifier, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getDataType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getInd1Topic(java.lang.String ind1, java.lang.String tag, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getInd1Type(java.lang.String tag, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getIndType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getInd2Topic(java.lang.String ind2, java.lang.String tag, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getInd2Type(java.lang.String tag, TopicMap tm) throws TopicMapException
TopicMapExceptionpublic java.lang.String getIndicatorName(java.lang.String field,
java.lang.String indicatorId,
java.lang.String value)
public java.lang.String getIndicatorValueName(java.lang.String field,
java.lang.String indicatorId,
java.lang.String value)
public Topic getRecordType(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getMARCClass(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getWandoraClass(TopicMap tm) throws TopicMapException
TopicMapExceptionpublic Topic getTopic(TopicMap tm, java.lang.String str, java.lang.String SIBase, Topic type) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn) throws TopicMapException
TopicMapExceptionprotected Topic getOrCreateTopic(TopicMap tm, java.lang.String si, java.lang.String bn, Topic type) throws TopicMapException
TopicMapExceptionprotected void makeSubclassOf(TopicMap tm, Topic t, Topic superclass) throws TopicMapException
TopicMapExceptionprotected java.lang.String makeSI(java.lang.String base,
java.lang.String endPoint)
Copyright 2004-2015 Wandora Team