@InProceedings{goutte-leger:2016:VarDial3,
  author    = {Goutte, Cyril  and  L\'{e}ger, Serge},
  title     = {Advances in Ngram-based Discrimination of Similar Languages},
  booktitle = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {178--184},
  abstract  = {We describe the systems entered by the National Research Council in the 2016
	shared task on discriminating similar languages. Like previous years, we relied
	on character ngram features, and a mixture of discriminative and generative
	statistical classifiers. We mostly investigated the influence of the amount of
	data on the performance, in the open task, and compared the two- stage approach
	(predicting language/group, then variant) to a flat approach. Results suggest
	that ngrams are still state-of-the-art for language and variant identification,
	and that additional data has a small but decisive impact.},
  url       = {http://aclweb.org/anthology/W16-4823}
}

