@InProceedings{chesney-EtAl:2017:MWE2017,
  author    = {Chesney, Sophie  and  Jacquet, Guillaume  and  Steinberger, Ralf  and  Piskorski, Jakub},
  title     = {Multi-word Entity Classification in a Highly Multilingual Environment},
  booktitle = {Proceedings of the 13th Workshop on Multiword Expressions (MWE 2017)},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {11--20},
  abstract  = {This paper describes an approach for the classification of millions of existing
	multi-word entities (MWEntities), such as organisation or event names, into
	thirteen category types, based only on the tokens they contain. 
	In order to classify our very large in-house collection of multilingual
	MWEntities into an application-oriented set of entity categories, we trained
	and tested distantly-supervised classifiers in 43 languages based on MWEntities
	extracted from BabelNet. The best-performing classifier was the multi-class SVM
	using a TF.IDF-weighted data representation. Interestingly, one unique
	classifier trained on a mix of all languages consistently performed better than
	classifiers trained for individual languages, reaching an averaged F1-value of
	88.8%. In this paper, we present the training and test data, including a human
	evaluation of its accuracy, describe the methods used to train the classifiers,
	and discuss the results.},
  url       = {http://www.aclweb.org/anthology/W17-1702}
}

