@inproceedings{basaldella-etal-2017-exploiting,
title = "Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages",
author = "Basaldella, Marco and
Helmy, Muhammad and
Antolli, Elisa and
Popescu, Mihai Horia and
Serra, Giuseppe and
Tasso, Carlo",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_012",
doi = "10.26615/978-954-452-049-6_012",
pages = "78--85",
abstract = "This paper evaluates different techniques for building a supervised, multilanguage keyphrase extraction pipeline for languages which lack a gold standard. Starting from an unsupervised English keyphrase extraction pipeline, we implement pipelines for Arabic, Italian, Portuguese, and Romanian, and we build test collections for languages which lack one. Then, we add a Machine Learning module trained on a well-known English language corpus and we evaluate the performance not only over English but on the other languages as well. Finally, we repeat the same evaluation after training the pipeline over an Arabic language corpus to check whether using a language-specific corpus brings a further improvement in performance. On the five languages we analyzed, results show an improvement in performance when using a machine learning algorithm, even if such algorithm is not trained and tested on the same language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="basaldella-etal-2017-exploiting">
<titleInfo>
<title>Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Basaldella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Helmy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Antolli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="given">Horia</namePart>
<namePart type="family">Popescu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Serra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlo</namePart>
<namePart type="family">Tasso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper evaluates different techniques for building a supervised, multilanguage keyphrase extraction pipeline for languages which lack a gold standard. Starting from an unsupervised English keyphrase extraction pipeline, we implement pipelines for Arabic, Italian, Portuguese, and Romanian, and we build test collections for languages which lack one. Then, we add a Machine Learning module trained on a well-known English language corpus and we evaluate the performance not only over English but on the other languages as well. Finally, we repeat the same evaluation after training the pipeline over an Arabic language corpus to check whether using a language-specific corpus brings a further improvement in performance. On the five languages we analyzed, results show an improvement in performance when using a machine learning algorithm, even if such algorithm is not trained and tested on the same language.</abstract>
<identifier type="citekey">basaldella-etal-2017-exploiting</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_012</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>78</start>
<end>85</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages
%A Basaldella, Marco
%A Helmy, Muhammad
%A Antolli, Elisa
%A Popescu, Mihai Horia
%A Serra, Giuseppe
%A Tasso, Carlo
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F basaldella-etal-2017-exploiting
%X This paper evaluates different techniques for building a supervised, multilanguage keyphrase extraction pipeline for languages which lack a gold standard. Starting from an unsupervised English keyphrase extraction pipeline, we implement pipelines for Arabic, Italian, Portuguese, and Romanian, and we build test collections for languages which lack one. Then, we add a Machine Learning module trained on a well-known English language corpus and we evaluate the performance not only over English but on the other languages as well. Finally, we repeat the same evaluation after training the pipeline over an Arabic language corpus to check whether using a language-specific corpus brings a further improvement in performance. On the five languages we analyzed, results show an improvement in performance when using a machine learning algorithm, even if such algorithm is not trained and tested on the same language.
%R 10.26615/978-954-452-049-6_012
%U https://doi.org/10.26615/978-954-452-049-6_012
%P 78-85
Markdown (Informal)
[Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages](https://doi.org/10.26615/978-954-452-049-6_012) (Basaldella et al., RANLP 2017)
ACL
- Marco Basaldella, Muhammad Helmy, Elisa Antolli, Mihai Horia Popescu, Giuseppe Serra, and Carlo Tasso. 2017. Exploiting and Evaluating a Supervised, Multilanguage Keyphrase Extraction pipeline for under-resourced languages. In Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017, pages 78–85, Varna, Bulgaria. INCOMA Ltd..