WASET
	@article{(Open Science Index):https://publications.waset.org/pdf/745,
	  title     = {Compression of Semistructured Documents},
	  author    = {Leo Galambos and  Jan Lansky and  Katsiaryna Chernik},
	  country	= {},
	  institution	= {},
	  abstract     = {EGOTHOR is a search engine that indexes the Web
and allows us to search the Web documents. Its hit list contains URL
and title of the hits, and also some snippet which tries to shortly
show a match. The snippet can be almost always assembled by an
algorithm that has a full knowledge of the original document (mostly
HTML page). It implies that the search engine is required to store
the full text of the documents as a part of the index.
Such a requirement leads us to pick up an appropriate compression
algorithm which would reduce the space demand. One of the solutions
could be to use common compression methods, for instance gzip or
bzip2, but it might be preferable if we develop a new method which
would take advantage of the document structure, or rather, the textual
character of the documents.
There already exist a special compression text algorithms and
methods for a compression of XML documents. The aim of this
paper is an integration of the two approaches to achieve an optimal
level of the compression ratio},
	    journal   = {International Journal of Computer and Information Engineering},
	  volume    = {2},
	  number    = {8},
	  year      = {2008},
	  pages     = {2851 - 2856},
	  ee        = {https://publications.waset.org/pdf/745},
	  url   	= {https://publications.waset.org/vol/20},
	  bibsource = {https://publications.waset.org/},
	  issn  	= {eISSN: 1307-6892},
	  publisher = {World Academy of Science, Engineering and Technology},
	  index 	= {Open Science Index 20, 2008},
	}