@InProceedings{rijhwani-EtAl:2017:Long,
  author    = {Rijhwani, Shruti  and  Sequiera, Royal  and  Choudhury, Monojit  and  Bali, Kalika  and  Maddila, Chandra Shekhar},
  title     = {Estimating Code-Switching on Twitter with a Novel Generalized Word-Level Language Detection Technique},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {1971--1982},
  abstract  = {Word-level language detection is necessary for analyzing code-switched text,
	where multiple languages could be mixed within a sentence. Existing models are
	restricted to code-switching between two specific languages and fail in
	real-world scenarios as text input rarely has a priori information on the
	languages used. We present a novel unsupervised word-level language detection
	technique for code-switched text for an arbitrarily large number of languages,
	which does not require any manually annotated training data. Our experiments
	with tweets in seven languages show a 74% relative error reduction in
	word-level labeling with respect to competitive baselines. We then use this
	system to conduct a large-scale quantitative analysis of code-switching
	patterns on Twitter, both global as well as region-specific, with 58M tweets.},
  url       = {http://aclweb.org/anthology/P17-1180}
}

