Alignment API and Server 3.6

fr.inrialpes.exmo.align.impl.method
Class StringDistances

java.lang.Object
  extended by fr.inrialpes.exmo.align.impl.method.StringDistances

public class StringDistances
extends Object


Constructor Summary
StringDistances()
           
 
Method Summary
static double equalDistance(String s, String t)
           
static double hammingDistance(String s, String t)
           
static boolean isAlpha(char c)
           
static boolean isAlphaCap(char c)
           
static boolean isAlphaNum(char c)
           
static boolean isAlphaSmall(char c)
           
static boolean isNum(char c)
           
static double jaroMeasure(String s, String t)
           
static double jaroWinklerMeasure(String s, String t)
           
static double levenshteinDistance(String s, String t)
           
static double needlemanWunch2Distance(String s, String t)
           
static double needlemanWunchDistance(String s, String t, int gap)
           
static double ngramDistance(String s, String t)
           
static double smoaDistance(String s1, String s2)
           
static String stripQuotations(String s)
           
static double subStringDistance(String s1, String s2)
           
static Vector<String> tokenize(String s)
          JE//: This is independent from WordNet and should go to StringDistances JE//: This should return a BagOfWords the new tokenizer first looks for non-alphanumeric chars in the string if any, they will be taken as the only delimiters otherwise the standard naming convention will be assumed: words start with a capital letter substring of capital letters will be seen as a whole if it is a suffix otherwise the last letter will be taken as the new token start Would be useful to parameterise with stop words as well
 Vector<String> tokenizeDep(String s)
          JE: I guess that here Dep means deprecated!
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

StringDistances

public StringDistances()
Method Detail

subStringDistance

public static double subStringDistance(String s1,
                                       String s2)

equalDistance

public static double equalDistance(String s,
                                   String t)

hammingDistance

public static double hammingDistance(String s,
                                     String t)

jaroMeasure

public static double jaroMeasure(String s,
                                 String t)

jaroWinklerMeasure

public static double jaroWinklerMeasure(String s,
                                        String t)

ngramDistance

public static double ngramDistance(String s,
                                   String t)

levenshteinDistance

public static double levenshteinDistance(String s,
                                         String t)

needlemanWunch2Distance

public static double needlemanWunch2Distance(String s,
                                             String t)

needlemanWunchDistance

public static double needlemanWunchDistance(String s,
                                            String t,
                                            int gap)

smoaDistance

public static double smoaDistance(String s1,
                                  String s2)

stripQuotations

public static String stripQuotations(String s)
Parameters:
s - a String
Returns:
s without included quotations between ' or "

tokenize

public static Vector<String> tokenize(String s)
JE//: This is independent from WordNet and should go to StringDistances JE//: This should return a BagOfWords the new tokenizer first looks for non-alphanumeric chars in the string if any, they will be taken as the only delimiters otherwise the standard naming convention will be assumed: words start with a capital letter substring of capital letters will be seen as a whole if it is a suffix otherwise the last letter will be taken as the new token start Would be useful to parameterise with stop words as well


tokenizeDep

public Vector<String> tokenizeDep(String s)
JE: I guess that here Dep means deprecated!

Parameters:
s - A string.
Returns:
a vector containing a collection of tokens.

isAlphaNum

public static boolean isAlphaNum(char c)

isAlpha

public static boolean isAlpha(char c)

isAlphaCap

public static boolean isAlphaCap(char c)

isAlphaSmall

public static boolean isAlphaSmall(char c)

isNum

public static boolean isNum(char c)

Alignment API and Server 3.6

(C) INRIA & friends, 2003-2008