@InProceedings{nisioi-ciobanu-dinu:2016:VarDial3,
  author    = {Nisioi, Sergiu  and  Ciobanu, Alina Maria  and  Dinu, Liviu P.},
  title     = {Vanilla Classifiers for Distinguishing between Similar Languages},
  booktitle = {Proceedings of the Third Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial3)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {235--242},
  abstract  = {In this paper we describe the submission of the UniBuc-NLP team for the
	Discriminating between Similar Languages Shared Task, DSL 2016. We present and
	analyze the results we obtained in the closed track of sub-task 1
	(Similar languages and language varieties) and sub-task 2
	(Arabic dialects). For sub-task 1 we used a logistic regression
	classifier with tf-idf feature weighting and for sub-task 2 a character-based
	string kernel with an SVM classifier. Our results show that good accuracy
	scores can be obtained with limited feature and model engineering. While
	certain limitations are to be acknowledged, our approach worked surprisingly
	well for out-of-domain, social media data, with 0.898 accuracy (3rd place) for
	dataset B1 and 0.838 accuracy (4th place) for dataset B2.},
  url       = {http://aclweb.org/anthology/W16-4830}
}

