@InProceedings{barbaresi:2017:VarDial,
  author    = {Barbaresi, Adrien},
  title     = {Discriminating between Similar Languages using Weighted Subword Features},
  booktitle = {Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {184--189},
  abstract  = {The present contribution revolves around a contrastive subword n-gram model
	which has been tested in the Discriminating between Similar Languages shared
	task. I present and discuss the method used in this 14-way language
	identification task comprising varieties of 6 main language groups. It features
	the following characteristics: (1) the preprocessing and conversion of a
	collection of documents to sparse features; (2) weighted character n-gram
	profiles; (3) a multinomial Bayesian classifier. Meaningful bag-of-n-grams
	features can be used as a system in a straightforward way, my approach
	outperforms most of the systems used in the DSL shared task (3rd rank).},
  url       = {http://www.aclweb.org/anthology/W17-1223}
}