@InProceedings{basaldella-EtAl:2017:RANLP,
  author    = {Basaldella, Marco  and  Helmy, Muhammad  and  Antolli, Elisa  and  Popescu, Mihai Horia  and  Serra, Giuseppe  and  Tasso, Carlo},
  title     = {Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {78--85},
  abstract  = {This paper evaluates different techniques for building a supervised,
	multilanguage keyphrase extraction pipeline for languages which lack a gold
	standard. 
	Starting from an unsupervised English keyphrase extraction pipeline, we
	implement pipelines for Arabic, Italian, Portuguese, and Romanian, and we build
	test collections for languages which lack one. 
	Then, we add a Machine Learning module trained on a well-known English language
	corpus and we evaluate the performance not only over English but on the other
	languages as well. Finally, we repeat the same evaluation after training the
	pipeline over an Arabic language corpus to check whether using a
	language-specific corpus brings a further improvement in performance. On the
	five languages we analyzed, results show an improvement in performance when
	using a machine learning algorithm, even if such algorithm is not trained and
	tested on the same language.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_012}
}

