@InProceedings{akhtar-EtAl:2017:LAW,
  author    = {Akhtar, Syed Sarfaraz  and  Gupta, Arihant  and  Vajpayee, Avijit  and  Srivastava, Arjit  and  Shrivastava, Manish},
  title     = {Word Similarity Datasets for Indian Languages: Annotation and Baseline Systems},
  booktitle = {Proceedings of the 11th Linguistic Annotation Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {91--94},
  abstract  = {With the advent of word representations, word similarity tasks are becoming
	increasing popular as an evaluation metric for the quality of the
	representations. In this paper, we present manually annotated monolingual word
	similarity datasets of six Indian languages - Urdu, Telugu, Marathi, Punjabi,
	Tamil and Gujarati. These languages are most spoken Indian languages worldwide
	after Hindi and Bengali. For the construction of these datasets, our approach
	relies on translation and re-annotation of word similarity datasets of English.
	We also present baseline scores for word representation models using
	state-of-the-art techniques for Urdu, Telugu and Marathi by evaluating them on
	newly created word similarity datasets.},
  url       = {http://www.aclweb.org/anthology/W17-0811}
}

