@InProceedings{barbaresi:2016:VarDial3,
  author    = {Barbaresi, Adrien},
  title     = {An Unsupervised Morphological Criterion for Discriminating Similar Languages},
  booktitle = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {212--220},
  abstract  = {In this study conducted on the occasion of the Discriminating between Similar
	Languages shared task, I introduce an additional decision factor focusing on
	the token and subtoken level. The motivation behind this submission is to test
	whether a morphologically-informed criterion can add linguistically relevant
	information to global categorization and thus improve performance. The
	contributions of this paper are (1) a description of the unsupervised,
	low-resource method; (2) an evaluation and analysis of its raw performance; and
	(3) an assessment of its impact within a model comprising common indicators
	used in language identification. I present and discuss the systems used in the
	task A, a 12-way language identification task comprising varieties of five main
	language groups. Additionally I introduce a new off-the-shelf Naive Bayes
	classifier using a contrastive word and subword n-gram model ("Bayesline")
	which outperforms the best submissions.},
  url       = {http://aclweb.org/anthology/W16-4827}
}

