@InProceedings{tsekouras-varlamis-giannakopoulos:2017:RANLP,
  author    = {Tsekouras, Leonidas  and  Varlamis, Iraklis  and  Giannakopoulos, George},
  title     = {A Graph-based Text Similarity Measure That Employs Named Entity Information},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {765--771},
  abstract  = {Text comparison is an interesting though hard task, with many applications in
	Natural Language Processing. This work introduces a new text-similarity
	measure, which employs named-entities' information extracted from the texts and
	the n-gram graphs' model for representing documents. Using OpenCalais as a
	named-entity recognition service and the JINSECT toolkit for constructing and
	managing n-gram graphs, the text similarity measure is embedded in a text
	clustering algorithm (k-Means). The evaluation of the produced clusters with
	various clustering validity metrics shows that the extraction of named entities
	at a first step can be profitable for the time-performance of similarity
	measures that are based on the n-gram graph representation without affecting
	the overall performance of the NLP task.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_098}
}

