@InProceedings{patro-EtAl:2017:EMNLP2017,
  author    = {Patro, Jasabanta  and  Samanta, Bidisha  and  Singh, Saurabh  and  Basu, Abhipsa  and  Mukherjee, Prithwish  and  Choudhury, Monojit  and  Mukherjee, Animesh},
  title     = {All that is English may be Hindi: Enhancing language identification through automatic ranking of the likeliness of word borrowing in social media},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {2264--2274},
  abstract  = {n this paper, we present a set of computational methods to identify the
	likeliness of a word being borrowed, based on the signals from social media. In
	terms of Spearman’s correlation values, our methods perform more than two
	times better (∼ 0.62) in predicting the borrowing likeliness compared to the
	best performing baseline (∼ 0.26) reported in literature. Based on this
	likeliness estimate we asked annotators to re-annotate the language tags of
	foreign words in predominantly native contexts. In 88% of cases the annotators
	felt that the foreign language tag should be replaced by native language tag,
	thus indicating a huge scope for improvement of automatic language
	identification systems.},
  url       = {https://www.aclweb.org/anthology/D17-1240}
}

