@InProceedings{ionescu-butnaru:2017:VarDial,
  author    = {Ionescu, Radu Tudor  and  Butnaru, Andrei},
  title     = {Learning to Identify Arabic and German Dialects using Multiple Kernels},
  booktitle = {Proceedings of the Fourth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial)},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {200--209},
  abstract  = {We present a machine learning approach for the Arabic Dialect Identification
	(ADI) and the German Dialect Identification (GDI) Closed Shared Tasks of the
	DSL 2017 Challenge. The proposed approach combines several kernels using
	multiple kernel learning. While most of our kernels are based on character
	p-grams (also known as n-grams) extracted from speech transcripts, we also use
	a kernel based on i-vectors, a low-dimensional representation of audio
	recordings, provided only for the Arabic data. In the learning stage, we
	independently employ Kernel Discriminant Analysis (KDA) and Kernel Ridge
	Regression (KRR). Our approach is shallow and simple, but the empirical results
	obtained in the shared tasks prove that it achieves very good results. Indeed,
	we ranked on the first place in the ADI Shared Task with a weighted F1 score of
	76.32% (4.62% above the second place) and on the fifth place in the GDI Shared
	Task with a weighted F1 score of 63.67% (2.57% below the first place).},
  url       = {http://www.aclweb.org/anthology/W17-1225}
}

