@InProceedings{vanderlee-vandenbosch:2017:VarDial,
  author    = {van der Lee, Chris  and  van den Bosch, Antal},
  title     = {Exploring Lexical and Syntactic Features for Language Variety Identification},
  booktitle = {Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {190--199},
  abstract  = {We present a method to discriminate between texts written in either the
	Netherlandic or the Flemish variant of the Dutch language. The method draws on
	a feature bundle representing text statistics, syntactic features, and word
	$n$-grams. Text statistics include average word length and sentence length,
	while syntactic features include ratios of function words and part-of-speech
	$n$-grams.
	        The effectiveness of the classifier was measured by classifying Dutch
	subtitles developed for either Dutch or Flemish television. Several machine
	learning algorithms were compared as well as feature combination methods in
	order to find the optimal generalization performance. A machine-learning meta
	classifier based on AdaBoost attained the best F-score of 0.92.},
  url       = {http://www.aclweb.org/anthology/W17-1224}
}

