@InProceedings{perezmelian-conejero-ferriramirez:2017:EACLSRW17,
  author    = {P\'{e}rez-Meli\'{a}n, Jos\'{e} Alberto  and  Conejero, J. Alberto  and  Ferri Ram\'{i}rez, Cesar},
  title     = {Zipf's and Benford's laws in Twitter hashtags},
  booktitle = {Proceedings of the Student Research Workshop at the 15th Conference of the European Chapter of the Association for Computational Linguistics},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {84--93},
  abstract  = {Social networks have transformed communication dramatically in recent years
	through the rise of new platforms and the development of a new language of
	communication. This landscape requires new forms to describe and predict the
	behaviour of users in networks.
	This paper presents an analysis of the frequency distribution of hashtag
	popularity in Twitter conversations. Our objective is to determine if these
	frequency distribution follow some well-known frequency distribution that many
	real-life sets of numerical data satisfy.
	In particular, we study the similarity of frequency distribution of hashtag
	popularity with respect to Zipf’s law, an empirical law referring to the
	phenomenon that many types of data in social sciences can be approximated with
	a Zipfian distribution.
	Additionally, we also analyse  Benford’s law, is a special case of Zipf's
	law, a common pattern about the frequency distribution of leading digits. In
	order to compute correctly the frequency distribution of hashtag popularity, we
	need to correct many spelling errors that Twitter's users introduce. For this
	purpose we introduce a new filter to correct hashtag mistake based on string
	distances. The experiments obtained employing datasets of Twitter streams
	generated under controlled conditions  show that  Benford’s law and Zipf's
	law can be used to model hashtag  frequency distribution.},
  url       = {http://www.aclweb.org/anthology/E17-4009}
}

