public class DocumentCollection extends HashSet<Document> implements Observer
Modifier and Type | Class and Description |
---|---|
static class |
DocumentCollection.WEIGHT |
Modifier and Type | Field and Description |
---|---|
private static long |
serialVersionUID |
private TreeMap<String,Set<Document>> |
terms |
Constructor and Description |
---|
DocumentCollection() |
Modifier and Type | Method and Description |
---|---|
boolean |
add(Document o) |
private void |
addTermOcc(Document doc,
String term) |
void |
clear() |
String[] |
getDimensions() |
double[] |
getDocVector(Document doc,
DocumentCollection.WEIGHT vectorType) |
Set<String> |
getTerms()
return the set of terms
Be careful this method returns a reference to the set of terms and not a copy !!!
|
double[] |
getTFDocVector(Document doc) |
double[] |
getTFIDFDocVector(Document doc) |
private void |
indexTermsOf(Document doc) |
boolean |
remove(Document o) |
void |
update(Observable o,
Object arg) |
clone, contains, isEmpty, iterator, remove, size, spliterator
equals, hashCode, removeAll
addAll, containsAll, retainAll, toArray, toArray, toString
finalize, getClass, notify, notifyAll, wait, wait, wait
addAll, containsAll, equals, hashCode, removeAll, retainAll, toArray, toArray
parallelStream, removeIf, stream
private static final long serialVersionUID
private void indexTermsOf(Document doc)
public Set<String> getTerms()
public String[] getDimensions()
public double[] getTFIDFDocVector(Document doc)
public double[] getDocVector(Document doc, DocumentCollection.WEIGHT vectorType)
public double[] getTFDocVector(Document doc)
public boolean add(Document o)
public void clear()
public boolean remove(Document o)
public void update(Observable o, Object arg)
(C) INRIA, UPMF & friends, 2008-2015