public class JWNLDistances extends Object
Modifier and Type | Field and Description |
---|---|
static double |
ADJ_WEIGHT |
private Hashtable |
adjectives1 |
private Hashtable |
adjectives2 |
(package private) double[][] |
adjectivesMasks |
(package private) double[][] |
adjectivesResults |
protected WeakHashMap |
cache
Provides the oportunity to cache pretreatments in measures which require them
Using this requires to set up initPreCache() before using the cache and please
cleanPreCache() after.
|
private static net.didion.jwnl.dictionary.Dictionary |
dictionary |
private static double |
MINIMUM_DISTANCE |
static double |
NOUN_WEIGHT |
private Hashtable |
nouns1 |
private Hashtable |
nouns2 |
(package private) double[][] |
nounsMasks |
(package private) double[][] |
nounsResults |
private static Set<Object> |
stopWords |
static double |
VERB_WEIGHT |
private Hashtable |
verbs1 |
private Hashtable |
verbs2 |
(package private) double[][] |
verbsMasks |
(package private) double[][] |
verbsResults |
Constructor and Description |
---|
JWNLDistances() |
Modifier and Type | Method and Description |
---|---|
double |
basicGlossOverlap(String s1,
String s2)
Compute the overlap between all glosses of two strings
|
double |
basicSynonymDistance(String s1,
String s2)
Compute a basic distance between 2 strings using WordNet synonym.
|
double |
basicSynonymySimilarity(String s1,
String s2)
Evaluate if two terms can be synonym
|
private double |
bestMatch(double[][] matrix) |
void |
cleanPreCache() |
double |
compareComponentNames(String s1,
String s2) |
protected Collection<String> |
computeGlossValue(String s)
Cache method for glosses
|
double |
computeSimilarity(String s1,
String s2)
This is an elaborate similarity based on WordNet
It is assumed to assess the similarity based on a decomposition and parsing of the strings.
|
protected Set<net.didion.jwnl.data.Synset> |
computeSynsets(String s)
Cache method for synsets
|
double |
computeTokenSimilarity(net.didion.jwnl.data.IndexWord index1,
net.didion.jwnl.data.IndexWord index2) |
double |
cosynonymySimilarity(String s1,
String s2)
Compute the proportion of common synset between two words
|
void |
display(net.didion.jwnl.data.Synset syn) |
void |
displayMatrix(double[][] matrix) |
void |
fillWithOnes(double[][] matrix) |
double |
findMatchForAdj(net.didion.jwnl.data.IndexWord index1,
net.didion.jwnl.data.IndexWord index2) |
double[][] |
getAdjectivesResults() |
(package private) Set<net.didion.jwnl.data.Synset> |
getAllSenses(String term)
Retrieve all WordNet senses of a term
|
int |
getCommonConcepts(net.didion.jwnl.data.list.PointerTargetNodeList list1,
net.didion.jwnl.data.list.PointerTargetNodeList list2) |
protected String |
getGlossForLabel(String s) |
protected String |
getGlossForLabel1(String s)
Fetches all the glosses from wordnet for the given term and concatenate them (without quotations).
|
double[][] |
getNounsResults() |
int |
getNumberOfOccurences(String token,
Hashtable nouns,
Hashtable adj,
Hashtable verbs) |
int |
getNumberOfOccurences(String token,
int n) |
double[][] |
getVerbsResults() |
void |
Initialize()
Initialize the JWNL API.
|
void |
Initialize(String wordnetdir,
String wordnetversion) |
void |
initPreCache() |
Set<?> |
loadStopWordsFromFile(String filename)
Reads a file containing one stopword per line
Returns these stop words as a set of strings
Set the defaults stopWords with this list
|
void |
lookUpWord(String word,
Hashtable<String,net.didion.jwnl.data.IndexWord> nouns,
Hashtable<String,net.didion.jwnl.data.IndexWord> adjectives,
Hashtable<String,net.didion.jwnl.data.IndexWord> verbs) |
protected String |
splitStringForWordNet(String s)
Retains only strings made of lowercase/uppercase characters
Suppress numbers
Split strings when they contain LowercaseUppercase "/" ":" "_" "\" "+" "." "*" "&"
But not "-" or "@" taken into account by Lucene
|
protected Set<String> |
tokenizeGloss(String s)
Takes a gloss-like string (text) and returns it tokenized.
|
double |
wuPalmerSimilarity(String s1,
String s2)
Compute the Wu-Palmer similarity defined by
score = 2*depth(lcs(s1,s2)) / (depth(s1) + depth(s2))
|
public static final double NOUN_WEIGHT
public static final double ADJ_WEIGHT
public static final double VERB_WEIGHT
private static final double MINIMUM_DISTANCE
private static net.didion.jwnl.dictionary.Dictionary dictionary
double[][] nounsResults
double[][] verbsResults
double[][] adjectivesResults
double[][] nounsMasks
double[][] verbsMasks
double[][] adjectivesMasks
private Hashtable nouns1
private Hashtable adjectives1
private Hashtable verbs1
private Hashtable nouns2
private Hashtable adjectives2
private Hashtable verbs2
protected WeakHashMap cache
public JWNLDistances() throws OntoSimException
OntoSimException
public void Initialize() throws OntoSimException
OntoSimException
public void Initialize(String wordnetdir, String wordnetversion) throws OntoSimException
OntoSimException
public void initPreCache()
public void cleanPreCache()
public Set<?> loadStopWordsFromFile(String filename) throws IOException, FileNotFoundException
IOException
FileNotFoundException
public double basicSynonymDistance(String s1, String s2)
s1
- s2
- Set<net.didion.jwnl.data.Synset> getAllSenses(String term) throws OntoSimException
term
- OntoSimException
protected Set<net.didion.jwnl.data.Synset> computeSynsets(String s) throws OntoSimException
OntoSimException
public double cosynonymySimilarity(String s1, String s2) throws OntoSimException
s1
- a Strings2
- a StringOntoSimException
public double basicSynonymySimilarity(String s1, String s2) throws OntoSimException
s1
- a Strings2
- a StringOntoSimException
public double basicGlossOverlap(String s1, String s2) throws OntoSimException
s1
- a Strings2
- a StringOntoSimException
protected Collection<String> computeGlossValue(String s) throws OntoSimException
OntoSimException
protected Set<String> tokenizeGloss(String s) throws IOException
IOException
protected String getGlossForLabel1(String s)
protected String splitStringForWordNet(String s)
public double wuPalmerSimilarity(String s1, String s2) throws OntoSimException
s1
- s2
- OntoSimException
public double computeSimilarity(String s1, String s2)
public double computeTokenSimilarity(net.didion.jwnl.data.IndexWord index1, net.didion.jwnl.data.IndexWord index2)
public double findMatchForAdj(net.didion.jwnl.data.IndexWord index1, net.didion.jwnl.data.IndexWord index2)
public void lookUpWord(String word, Hashtable<String,net.didion.jwnl.data.IndexWord> nouns, Hashtable<String,net.didion.jwnl.data.IndexWord> adjectives, Hashtable<String,net.didion.jwnl.data.IndexWord> verbs)
word
- public void display(net.didion.jwnl.data.Synset syn)
public int getCommonConcepts(net.didion.jwnl.data.list.PointerTargetNodeList list1, net.didion.jwnl.data.list.PointerTargetNodeList list2)
private double bestMatch(double[][] matrix)
public int getNumberOfOccurences(String token, int n)
token
- A token.n
- The number of the ontology (typically 1 or 2).public int getNumberOfOccurences(String token, Hashtable nouns, Hashtable adj, Hashtable verbs)
public void displayMatrix(double[][] matrix)
public void fillWithOnes(double[][] matrix)
public double[][] getAdjectivesResults()
public double[][] getNounsResults()
public double[][] getVerbsResults()
(C) INRIA, UPMF & friends, 2008-2015