@InProceedings{gromann-declerck:2017:RANLP,
  author    = {Gromann, Dagmar  and  Declerck, Thierry},
  title     = {Hashtag Processing for Enhanced Clustering of Tweets},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {277--283},
  abstract  = {Rich data provided by tweets have beenanalyzed, clustered, and explored in a
	variety of studies. Typically those studies focus on named entity recognition,
	entity linking, and entity disambiguation or clustering. Tweets and hashtags
	are generally analyzed on sentential or word level but not on a compositional
	level of concatenated words. We propose an approach for a closer analysis of
	compounds in hashtags, and in the long run also of other types of text
	sequences in tweets, in order to enhance the clustering of such text documents.
	Hashtags have been used before as primary topic indicators to cluster tweets,
	however, their segmentation and its effect on clustering results have not been
	investigated to the best of our knowledge. Our results with a standard dataset
	from the Text
	REtrieval Conference (TREC) show that segmented and harmonized hashtags
	positively impact effective clustering.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_038}
}

