@InProceedings{chandu-EtAl:2018:W18-322,
  author    = {Chandu, Khyathi  and  Manzini, Thomas  and  Singh, Sumeet  and  Black, Alan W.},
  title     = {Language Informed Modeling of Code-Switched Text},
  booktitle = {Proceedings of the Third Workshop on Computational Approaches to Linguistic Code-Switching},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {92--97},
  abstract  = {Code-switching (CS), the practice of alternating between two or more languages in conversations, is pervasive in most multi-lingual communities. CS texts have a complex interplay between languages and occur in informal contexts that make them harder to collect and construct NLP tools for. We approach this problem through Language Modeling (LM) on a new Hindi-English mixed corpus containing 59,189 unique sentences collected from blogging websites. We implement and discuss different Language Models derived from a multi-layered LSTM architecture. We hypothesize that encoding language information strengthens a language model by helping to learn code-switching points. We show that our highest performing model achieves a test perplexity of 19.52 on the CS corpus that we collected and processed. On this data we demonstrate that our performance is an improvement over AWD-LSTM LM (a recent state of the art on monolingual English).},
  url       = {http://www.aclweb.org/anthology/W18-3211}
}

