@InProceedings{adouane-dobnik:2017:W17-13,
  author    = {Adouane, Wafia  and  Dobnik, Simon},
  title     = {Identification of Languages in Algerian Arabic Multilingual Documents},
  booktitle = {Proceedings of the Third Arabic Natural Language Processing Workshop},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {1--8},
  abstract  = {This paper presents a language identification system designed to detect the
	language of each word, in its context, in a multilingual documents as generated
	in social media by bilingual/multilingual communities, in our case speakers of
	Algerian Arabic. We frame the task as a sequence tagging problem and use
	supervised machine learning with standard methods like HMM and Ngram
	classification tagging. We also experiment with a lexicon-based method.
	Combining all the methods in a fall-back mechanism and introducing some
	linguistic rules, to deal with unseen tokens and ambiguous words, gives an
	overall accuracy of 93.14%. Finally, we introduced rules for language
	identification from sequences of recognised words.},
  url       = {http://www.aclweb.org/anthology/W17-1301}
}

