@InProceedings{ramanarayanan-pugh:2018:SIGdial,
  author    = {Ramanarayanan, Vikram  and  Pugh, Robert},
  title     = {Automatic Token and Turn Level Language Identification for Code-Switched Text Dialog: An Analysis Across Language Pairs and Corpora},
  booktitle = {Proceedings of the 19th Annual SIGdial Meeting on Discourse and Dialogue},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {80--88},
  abstract  = {We examine the efficacy of various feature--learner combinations for language identification in different types of text-based code-switched interactions -- human-human dialog, human-machine dialog as well as monolog -- at both the token and turn levels. In order to examine the generalization of such methods across language pairs and datasets, we analyze 10 different datasets of code-switched text. We extract a variety of character- and word-based text features and pass them into multiple learners, including conditional random fields, logistic regressors and recurrent neural networks. We further examine the efficacy of novel character-level embedding and GloVe features in improving performance and observe that our best-performing text system significantly outperforms a majority vote baseline across language pairs and datasets.},
  url       = {http://www.aclweb.org/anthology/W18-5009}
}

