@InProceedings{kocmi-bojar:2017:EACLlong,
  author    = {Kocmi, Tom  and  Bojar, Ond\v{r}ej},
  title     = {LanideNN: Multilingual Language Identification on Text Stream},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {927--936},
  abstract  = {In language identification, a common first
	step in natural language processing, we
	want to automatically determine the language
	of some input text. Monolingual
	language identification assumes that the
	given document is written in one language.
	In multilingual language identification, the
	document is usually in two or three languages
	and we just want their names. We
	aim one step further and propose a method
	for textual language identification where
	languages can change arbitrarily and the
	goal is to identify the spans of each of the
	languages.
	Our method is based on Bidirectional Recurrent
	Neural Networks and it performs
	well in monolingual and multilingual language
	identification tasks on six datasets
	covering 131 languages. The method
	keeps the accuracy also for short documents
	and across domains, so it is ideal
	for off-the-shelf use without preparation of
	training data.},
  url       = {http://www.aclweb.org/anthology/E17-1087}
}

