@InProceedings{mandal-nanmaran:2018:W-NUT2018,
  author    = {Mandal, Soumil  and  Nanmaran, Karthick},
  title     = {Normalization of Transliterated Words in Code-Mixed Data Using Seq2Seq Model \& Levenshtein Distance},
  booktitle = {Proceedings of the 2018 EMNLP Workshop W-NUT: The 4th Workshop on Noisy User-generated Text},
  month     = {November},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {49--53},
  abstract  = {Building tools for code-mixed data is rapidly gaining popularity in the NLP research community as such data is exponentially rising on social media. Working with code-mixed data contains several challenges, especially due to grammatical inconsistencies and spelling variations in addition to all the previous known challenges for social media scenarios. In this article, we present a novel architecture focusing on normalizing phonetic typing variations, which is commonly seen in code-mixed data. One of the main features of our architecture is that in addition to normalizing, it can also be utilized for back-transliteration and word identification in some cases. Our model achieved an accuracy of 90.27% on the test data.},
  url       = {http://www.aclweb.org/anthology/W18-6107}
}

