public class WordNet
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static int |
ADJECTIVE |
static int |
ADJECTIVE_SATELLITE |
java.util.Hashtable<java.lang.String,java.lang.String> |
adjectiveDocumentationHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
adjectiveSUMOHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
adjectiveSynsetHash |
static int |
ADVERB |
java.util.Hashtable<java.lang.String,java.lang.String> |
adverbDocumentationHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
adverbSUMOHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
adverbSynsetHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
exceptionNounHash
list of irregular plural forms where the key is the
plural, singular is the value.
|
java.util.Hashtable<java.lang.String,java.lang.String> |
exceptionVerbHash |
static boolean |
initNeeded |
java.lang.String |
maxNounSynsetID |
java.lang.String |
maxVerbSynsetID |
java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> |
multiWord
A HashMap of String keys and String values.
|
static int |
NOUN |
java.util.Hashtable<java.lang.String,java.lang.String> |
nounDocumentationHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
nounSUMOHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
nounSynsetHash |
java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.String>> |
OMW
A HashMap with language name keys and HashMap
|
java.util.Hashtable<java.lang.String,java.util.ArrayList<AVPair>> |
relations
Keys are POS-prefixed synsets, values are ArrayList(s) of AVPair(s)
in which the attribute is a pointer type according to
http://wordnet.princeton.edu/man/wninput.5WN.html#sect3 and
the value is a POS-prefixed synset
|
java.util.HashMap<java.lang.String,java.lang.String> |
reverseSenseIndex
A HashMap where the keys are 9 digit POS prefixed WordNet synset byte offsets,
and the values are of the form word_POS_sensenum (alpha POS like "VB").
|
java.util.HashMap<java.lang.String,java.lang.String> |
senseIndex
A HashMap where the keys are of the form word_POS_sensenum (alpha POS like "VB")
and values are 8 digit WordNet synset byte offsets.
|
java.util.ArrayList<java.lang.String> |
stopwords
English "stop words" such as "a", "at", "them", which have no or little
inherent meaning when taken alone.
|
java.util.Hashtable<java.lang.String,java.util.ArrayList<java.lang.String>> |
SUMOHash
Keys are SUMO terms, values are ArrayLists(s) of
POS-prefixed synset String(s) with part of speech
prepended to the synset number.
|
java.util.Hashtable<java.lang.String,java.util.ArrayList<java.lang.String>> |
synsetsToWords
Keys are String POS-prefixed synsets.
|
static int |
VERB |
java.util.Hashtable<java.lang.String,java.lang.String> |
verbDocumentationHash |
java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> |
verbFrames
A HashMap where keys are 8 digit
WordNet synset byte offsets or synsets appended with a dash and a specific
word such as "12345678-foo".
|
java.util.Hashtable<java.lang.String,java.lang.String> |
verbSUMOHash |
java.util.Hashtable<java.lang.String,java.lang.String> |
verbSynsetHash |
static WordNet |
wn |
static java.util.HashMap<java.lang.String,WordNet> |
wns |
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> |
wordCoFrequencies
a HashMap of HashMaps where the key is a word sense of the
form word_POS_num signifying the word, part of speech and number
of the sense in WordNet.
|
protected java.util.HashMap<java.lang.String,java.util.TreeSet<AVPair>> |
wordFrequencies
a HashMap of HashMaps where the key is a word and the value is a
HashMap of 9-digit POS-prefixed senses and the number of times that
sense occurs in the Brown corpus.
|
java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> |
wordsToSenses
A HashMap with words as keys and ArrayList as values.
|
Constructor and Description |
---|
WordNet() |
Modifier and Type | Method and Description |
---|---|
java.util.HashMap<java.lang.String,java.lang.Integer> |
collectCountedWordSenses(java.lang.String sentence)
Collect all the synsets that represent the best guess at
meanings for all the words in a sentence.
|
int |
collectMultiWord(java.util.ArrayList<java.lang.String> text,
int startIndex,
java.util.ArrayList<java.lang.String> synset)
Find the synset for a multi-word string, if it exists.
|
boolean |
containsWord(java.lang.String word,
int pos)
Does WordNet contain the given word.
|
java.lang.String |
displayByKey(java.lang.String sumokbname,
java.lang.String key,
java.lang.String params) |
java.lang.String |
displaySynset(java.lang.String sumokbname,
java.lang.String synset,
java.lang.String params) |
java.lang.String |
generateNounSynsetID()
Generate a new noun synset ID that doesn't have an existing hash
|
java.lang.String |
generateVerbSynsetID()
Generate a new noun synset ID that doesn't have an existing hash
|
java.lang.String |
getDocumentation(java.lang.String synset) |
java.util.TreeMap<java.lang.String,java.util.ArrayList<java.lang.String>> |
getSensesFromWord(java.lang.String word)
Get all the synsets for a given word.
|
java.lang.String |
getSUMOMapping(java.lang.String synset)
Get the SUMO mapping for a POS-prefixed synset
|
java.lang.String |
getSUMOterm(java.lang.String word,
int pos)
Get the SUMO term for the given root form word and part of speech.
|
java.io.File |
getWnFile(java.lang.String key)
Returns the WordNet File object corresponding to key.
|
java.util.ArrayList<java.lang.String> |
getWordsFromSynset(java.lang.String synset) |
java.util.TreeMap<java.lang.String,java.lang.String> |
getWordsFromTerm(java.lang.String SUMOterm)
Get the words and synsets corresponding to a SUMO term.
|
static void |
initOnce()
Read the WordNet files only on initialization of the class.
|
boolean |
isFile(java.lang.String s) |
boolean |
isHyponym(java.lang.String synset,
java.lang.String hypo) |
boolean |
isHyponymRecurse(java.lang.String synset,
java.lang.String hypo,
java.util.ArrayList<java.lang.String> visited) |
boolean |
isStopWord(java.lang.String word)
Check whether the word is a stop word
|
static void |
main(java.lang.String[] args)
A main method, used only for testing.
|
java.lang.String |
nounRootForm(java.lang.String mixedCase,
java.lang.String input)
Return the root form of the noun, or null if it's not in the lexicon.
|
java.lang.String |
nounSynsetFromTermFormat(java.lang.String tf,
java.lang.String SUMOterm,
KB kb)
Generate a new noun synset from a termFormat
|
java.lang.String |
page(java.lang.String inp,
int pos,
java.lang.String kbname,
java.lang.String synset,
java.lang.String params)
This is the regular point of entry for this class.
|
java.lang.String |
processPrologString(java.lang.String doc)
Double any single quotes that appear.
|
void |
readSenseCount()
Read word sense frequencies into a HashMap of PriorityQueues
containing AVPairs where the value is a word and the attribute
(on which PriorityQueue is sorted) is an 8 digit String
representation of an integer count.
|
void |
readSenseIndex()
Note that WordNet forces all these words to lowercase in the index.xxx files
|
void |
readStopWords() |
void |
readWordCoFrequencies()
Return a HashMap of HashMaps where the key is a word sense of the
form word_POS_num signifying the word, part of speech and number
of the sense in WordNet.
|
java.lang.String |
removeStopWords(java.lang.String sentence)
Remove stop words from a sentence.
|
java.lang.String |
senseKeyPOS(java.lang.String senseKey) |
static java.util.ArrayList<java.lang.String> |
splitToArrayList(java.lang.String st)
Return an ArrayList of the string split by spaces.
|
static java.util.ArrayList<java.lang.String> |
splitToArrayListSentence(java.lang.String st)
Return an ArrayList of the string split by periods.
|
java.lang.String |
sumoFileDisplay(java.lang.String pathname,
java.lang.String counter,
java.lang.String params)
A routine which takes a full pathname as input and returns a sentence by sentence display of sense
and sentiment analysis
|
java.lang.String |
sumoSentenceDisplay(java.lang.String input,
java.lang.String context,
java.lang.String params)
A routine which looks up a given list of words in the hashtables
to find the relevant word definitions and SUMO mappings.
|
java.lang.String |
sumoSentimentDisplay(java.lang.String sentence)
A routine that uses computeSentiment in DB.java to display a sentiment score for a single sentence
as well as the individual scores of scored descriptors.
|
void |
synsetFromTermFormat(java.lang.String tf,
java.lang.String SUMOterm,
KB kb)
Generate a new synset from a termFormat
|
void |
termFormatsToSynsets(KB kb)
Generate a new synset from a termFormat
|
static void |
testProcessPointers()
A method used only for testing.
|
static void |
testWordFreq()
A method used only for testing.
|
java.lang.String |
verbRootForm(java.lang.String mixedCase,
java.lang.String input)
Return the present tense singular form of the verb, or null if
it's not in the lexicon.
|
java.lang.String |
verbSynsetFromTermFormat(java.lang.String tf,
java.lang.String SUMOterm,
KB kb)
Generate a new verb synset from a termFormat
|
void |
writeProlog(KB kb) |
void |
writeWordNetG() |
void |
writeWordNetHyp() |
void |
writeWordNetProlog() |
void |
writeWordNetS()
Write WordNet data to a prolog file with a single kind of clause
in the following format:
s(Synset_ID, Word_No_in_the_Synset, Word, SS_Type,
Synset_Rank_By_the_Word,Tag_Count)
|
void |
writeXML() |
public static WordNet wn
public static java.util.HashMap<java.lang.String,WordNet> wns
public static boolean initNeeded
public java.util.Hashtable<java.lang.String,java.lang.String> nounSynsetHash
public java.util.Hashtable<java.lang.String,java.lang.String> verbSynsetHash
public java.util.Hashtable<java.lang.String,java.lang.String> adjectiveSynsetHash
public java.util.Hashtable<java.lang.String,java.lang.String> adverbSynsetHash
public java.util.Hashtable<java.lang.String,java.lang.String> verbDocumentationHash
public java.util.Hashtable<java.lang.String,java.lang.String> adjectiveDocumentationHash
public java.util.Hashtable<java.lang.String,java.lang.String> adverbDocumentationHash
public java.util.Hashtable<java.lang.String,java.lang.String> nounDocumentationHash
public java.util.Hashtable<java.lang.String,java.lang.String> nounSUMOHash
public java.util.Hashtable<java.lang.String,java.lang.String> verbSUMOHash
public java.util.Hashtable<java.lang.String,java.lang.String> adjectiveSUMOHash
public java.util.Hashtable<java.lang.String,java.lang.String> adverbSUMOHash
public java.lang.String maxNounSynsetID
public java.lang.String maxVerbSynsetID
public java.util.Hashtable<java.lang.String,java.util.ArrayList<java.lang.String>> SUMOHash
public java.util.Hashtable<java.lang.String,java.util.ArrayList<java.lang.String>> synsetsToWords
public java.util.Hashtable<java.lang.String,java.lang.String> exceptionNounHash
public java.util.Hashtable<java.lang.String,java.lang.String> exceptionVerbHash
public java.util.Hashtable<java.lang.String,java.util.ArrayList<AVPair>> relations
protected java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.Integer>> wordCoFrequencies
protected java.util.HashMap<java.lang.String,java.util.TreeSet<AVPair>> wordFrequencies
public java.util.ArrayList<java.lang.String> stopwords
public java.util.HashMap<java.lang.String,java.lang.String> senseIndex
public java.util.HashMap<java.lang.String,java.lang.String> reverseSenseIndex
public java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> verbFrames
public java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> wordsToSenses
public java.util.HashMap<java.lang.String,java.util.ArrayList<java.lang.String>> multiWord
public static final int NOUN
public static final int VERB
public static final int ADJECTIVE
public static final int ADVERB
public static final int ADJECTIVE_SATELLITE
public java.util.HashMap<java.lang.String,java.util.HashMap<java.lang.String,java.lang.String>> OMW
public java.io.File getWnFile(java.lang.String key)
key
- A descriptive literal String that maps to a regular
expression pattern used to obtain a WordNet file.public static java.util.ArrayList<java.lang.String> splitToArrayList(java.lang.String st)
public static java.util.ArrayList<java.lang.String> splitToArrayListSentence(java.lang.String st)
public java.lang.String getSUMOMapping(java.lang.String synset)
public void readWordCoFrequencies()
public void readStopWords()
public void readSenseIndex()
public void readSenseCount()
public java.lang.String sumoSentenceDisplay(java.lang.String input, java.lang.String context, java.lang.String params)
input
- is the target sentence to be parsed. See WordSenseBody.jsp for usage.context
- is the larger context of the sentence. Can mean more accurate results.params
- is the set of html parameterspublic java.lang.String sumoSentimentDisplay(java.lang.String sentence)
sentence
- is the target sentence to be scored. See WordSenseBody.jsp for usage.public java.lang.String sumoFileDisplay(java.lang.String pathname, java.lang.String counter, java.lang.String params)
sentence
- is the target sentence to be parsed. See WordSenseBody.jsp for usage.counter
- is used to keep track of which sentence is being displayedparams
- is the set of html parameterspublic boolean isFile(java.lang.String s)
public boolean isHyponymRecurse(java.lang.String synset, java.lang.String hypo, java.util.ArrayList<java.lang.String> visited)
public boolean isHyponym(java.lang.String synset, java.lang.String hypo)
public java.lang.String removeStopWords(java.lang.String sentence)
public boolean isStopWord(java.lang.String word)
public int collectMultiWord(java.util.ArrayList<java.lang.String> text, int startIndex, java.util.ArrayList<java.lang.String> synset)
text
- is an array of String words.startIndex
- is the first word in the array to look atsynset
- is an array of only one element, if a synset is found
and empty otherwisepublic java.util.HashMap<java.lang.String,java.lang.Integer> collectCountedWordSenses(java.lang.String sentence)
public static void initOnce()
public java.lang.String nounRootForm(java.lang.String mixedCase, java.lang.String input)
public java.lang.String verbRootForm(java.lang.String mixedCase, java.lang.String input)
public java.util.TreeMap<java.lang.String,java.util.ArrayList<java.lang.String>> getSensesFromWord(java.lang.String word)
public java.util.TreeMap<java.lang.String,java.lang.String> getWordsFromTerm(java.lang.String SUMOterm)
public java.util.ArrayList<java.lang.String> getWordsFromSynset(java.lang.String synset)
public java.lang.String getSUMOterm(java.lang.String word, int pos)
public boolean containsWord(java.lang.String word, int pos)
public java.lang.String page(java.lang.String inp, int pos, java.lang.String kbname, java.lang.String synset, java.lang.String params)
inp
- The string the user is searching for.pos
- The part of speech of the word 1=noun, 2=verb, 3=adjective, 4=adverbpublic java.lang.String getDocumentation(java.lang.String synset)
synset
- is a synset with POS-prefixpublic java.lang.String displaySynset(java.lang.String sumokbname, java.lang.String synset, java.lang.String params)
synset
- is a synset with POS-prefixpublic java.lang.String displayByKey(java.lang.String sumokbname, java.lang.String key, java.lang.String params)
key
- is a WordNet sense keypublic void writeXML()
public void writeProlog(KB kb)
public java.lang.String senseKeyPOS(java.lang.String senseKey)
public void writeWordNetS()
public void writeWordNetHyp()
public java.lang.String processPrologString(java.lang.String doc)
public void writeWordNetG()
public void writeWordNetProlog() throws java.io.IOException
java.io.IOException
public java.lang.String generateNounSynsetID()
public java.lang.String generateVerbSynsetID()
public java.lang.String nounSynsetFromTermFormat(java.lang.String tf, java.lang.String SUMOterm, KB kb)
public java.lang.String verbSynsetFromTermFormat(java.lang.String tf, java.lang.String SUMOterm, KB kb)
public void synsetFromTermFormat(java.lang.String tf, java.lang.String SUMOterm, KB kb)
public void termFormatsToSynsets(KB kb)
public static void testWordFreq()
public static void testProcessPointers()
public static void main(java.lang.String[] args)