public class FeatureMakerMaryServer extends Object
Modifier and Type | Field and Description |
---|---|
protected static FeatureDefinition |
featDef |
protected static MaryHttpClient |
mary |
protected static String |
maryHost |
protected static String |
maryPort |
protected static int |
numSentences |
protected static int |
numUnreliableSentences |
protected static Vector<String> |
selectionFeature |
protected static int[] |
selectionFeatureIndex |
protected static boolean |
strangeSymbols |
protected static boolean |
strictReliability |
protected static boolean |
unknownWords |
protected static boolean |
usefulSentence |
protected static DBHandler |
wikiToDB |
Constructor and Description |
---|
FeatureMakerMaryServer() |
Modifier and Type | Method and Description |
---|---|
protected static int |
checkReliability(Element t)
Phonemise the given document with the help of JPhonemiser
g2p_method "contains-unknown-words" or "contains-strange-symbols",
|
protected static StringBuffer |
collectTokens(Node nextToken,
StringBuffer sentence)
Collect the tokens of a sentence
|
protected static byte[] |
getFeatures(MaryData d)
Process the target features and print them to the given file
|
protected static void |
getXMLAsString(Node motherNode,
StringBuilder ppText)
Convert the given xml-node and its subnodes to Strings and collect them in the given StringBuilder
|
static void |
main(String[] args) |
protected static Document |
phonemiseText(String textString,
int id)
Process the given text with the MaryClient from Text to Chunked
|
protected static void |
printUsage()
Print usage of this program
|
protected static MaryData |
processSentence(String nextSentence,
int textId,
String feas)
Process one sentences from text to target features
|
protected static boolean |
readArgs(String[] args)
Read and parse the command line args
|
protected static Vector<String> |
splitIntoSentences(String text,
int id,
boolean test)
Split the text into separate sentences
|
protected static MaryHttpClient mary
protected static boolean usefulSentence
protected static boolean unknownWords
protected static boolean strangeSymbols
protected static FeatureDefinition featDef
protected static int[] selectionFeatureIndex
protected static String maryHost
protected static String maryPort
protected static boolean strictReliability
protected static int numSentences
protected static int numUnreliableSentences
protected static DBHandler wikiToDB
protected static void printUsage()
protected static boolean readArgs(String[] args)
args
- the argsprotected static MaryData processSentence(String nextSentence, int textId, String feas)
nextSentence
- the sentencetextId
- the file containing the sentencefeas
- target features names separated by space (ex. "phone next_phone selection_prosody")protected static Document phonemiseText(String textString, int id) throws Exception
textString
- the text to processid
- idException
- Exceptionprotected static byte[] getFeatures(MaryData d) throws Exception
d
- the target features as Mary Data objectException
- Exceptionprotected static Vector<String> splitIntoSentences(String text, int id, boolean test) throws Exception
text
- the fileid
- idtest
- testException
- Exceptionprotected static StringBuffer collectTokens(Node nextToken, StringBuffer sentence)
nextToken
- the Node to start from checkCredibility returns 0 if the sentence is useful 1 if the sentence contains
unknownWords (so the sentence is not useful) 2 if the sentence contains strangeSymbols (so the sentence is not
useful)sentence
- sentenceprotected static int checkReliability(Element t)
t
- tprotected static void getXMLAsString(Node motherNode, StringBuilder ppText)
motherNode
- the xml-nodeppText
- the StringBuilderCopyright © 2000–2016 DFKI GmbH. All rights reserved.