WASET
	@article{(Open Science Index):https://publications.waset.org/pdf/7768,
	  title     = {Using Suffix Tree Document Representation in Hierarchical Agglomerative Clustering},
	  author    = {Daniel I. Morariu and  Radu G. Cretulescu and  Lucian N. Vintan},
	  country	= {},
	  institution	= {},
	  abstract     = {In text categorization problem the most used method
for documents representation is based on words frequency vectors
called VSM (Vector Space Model). This representation is based only
on words from documents and in this case loses any “word context"
information found in the document. In this article we make a
comparison between the classical method of document representation
and a method called Suffix Tree Document Model (STDM) that is
based on representing documents in the Suffix Tree format. For the
STDM model we proposed a new approach for documents
representation and a new formula for computing the similarity
between two documents. Thus we propose to build the suffix tree
only for any two documents at a time. This approach is faster, it has
lower memory consumption and use entire document representation
without using methods for disposing nodes. Also for this method is
proposed a formula for computing the similarity between documents,
which improves substantially the clustering quality. This
representation method was validated using HAC - Hierarchical
Agglomerative Clustering. In this context we experiment also the
stemming influence in the document preprocessing step and highlight
the difference between similarity or dissimilarity measures to find
“closer" documents.},
	    journal   = {International Journal of Computer and Information Engineering},
	  volume    = {5},
	  number    = {11},
	  year      = {2011},
	  pages     = {1160 - 1165},
	  ee        = {https://publications.waset.org/pdf/7768},
	  url   	= {https://publications.waset.org/vol/59},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 59, 2011},
	}