@InProceedings{albadrashiny-diab:2016:COLING,
  author    = {Al-Badrashiny, Mohamed  and  Diab, Mona},
  title     = {LILI: A Simple Language Independent Approach for Language Identification},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1211--1219},
  abstract  = {We introduce a generic Language Independent Framework for Linguistic Code
	Switch Point Detection. The system uses characters level 5-grams and word level
	unigram language models to train a  conditional random fields (CRF) model for
	classifying  input words into various languages. We test our proposed framework
	and compare it to the state-of-the-art published systems on standard data sets
	from several language pairs: English-Spanish, Nepali-English, English-Hindi,
	Arabizi (Refers to Arabic written using the Latin/Roman script)-English, 
	Arabic-Engari (Refers to English written using                                     
	Arabic
	script),
	Modern
	Standard
	Arabic(MSA)-Egyptian, Levantine-MSA, Gulf-MSA, one more English-Spanish, and
	one more MSA-EGY.  The overall weighted average F-score of each language pair
	are 96.4%, 97.3%, 98.0%, 97.0%, 98.9%, 86.3%, 88.2%, 90.6%, 95.2%, and 85.0% 
	respectively. The results show that our approach despite its simplicity, either
	outperforms or performs at comparable levels to state-of-the-art published
	systems.},
  url       = {http://aclweb.org/anthology/C16-1115}
}

