@inproceedings{kang-shin-2024-improving-low,
title = "Improving Low-Resource Keyphrase Generation through Unsupervised Title Phrase Generation",
author = "Kang, Byungha and
Shin, Youhyun",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.775",
pages = "8853--8865",
abstract = "This paper introduces a novel approach called title phrase generation (TPG) for unsupervised keyphrase generation (UKG), leveraging a pseudo label generated from a document title. Previous UKG method extracts all phrases from a corpus to build a phrase bank, then draws candidate absent keyphrases related to a document from the phrase bank to generate a pseudo label. However, we observed that when separating the document title from the document body, a significant number of phrases absent from the document body are included in the title. Based on this observation, we propose an effective method for generating pseudo labels using phrases mined from the document title. We initially train BART using these pseudo labels (TPG) and then perform supervised fine-tuning on a small amount of human-annotated data, which we term low-resource fine-tuning (LRFT). Experimental results on five benchmark datasets demonstrate that our method outperforms existing low-resource keyphrase generation approaches even with fewer labeled data, showing strength in generating absent keyphrases. Moreover, our model trained solely with TPG, without any labeled data, surpasses previous UKG method, highlighting the effectiveness of utilizing titles over a phrase bank. The code is available at https://github.com/kangnlp/low-resource-kpgen-through-TPG.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kang-shin-2024-improving-low">
<titleInfo>
<title>Improving Low-Resource Keyphrase Generation through Unsupervised Title Phrase Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Byungha</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Youhyun</namePart>
<namePart type="family">Shin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a novel approach called title phrase generation (TPG) for unsupervised keyphrase generation (UKG), leveraging a pseudo label generated from a document title. Previous UKG method extracts all phrases from a corpus to build a phrase bank, then draws candidate absent keyphrases related to a document from the phrase bank to generate a pseudo label. However, we observed that when separating the document title from the document body, a significant number of phrases absent from the document body are included in the title. Based on this observation, we propose an effective method for generating pseudo labels using phrases mined from the document title. We initially train BART using these pseudo labels (TPG) and then perform supervised fine-tuning on a small amount of human-annotated data, which we term low-resource fine-tuning (LRFT). Experimental results on five benchmark datasets demonstrate that our method outperforms existing low-resource keyphrase generation approaches even with fewer labeled data, showing strength in generating absent keyphrases. Moreover, our model trained solely with TPG, without any labeled data, surpasses previous UKG method, highlighting the effectiveness of utilizing titles over a phrase bank. The code is available at https://github.com/kangnlp/low-resource-kpgen-through-TPG.</abstract>
<identifier type="citekey">kang-shin-2024-improving-low</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.775</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>8853</start>
<end>8865</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Low-Resource Keyphrase Generation through Unsupervised Title Phrase Generation
%A Kang, Byungha
%A Shin, Youhyun
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F kang-shin-2024-improving-low
%X This paper introduces a novel approach called title phrase generation (TPG) for unsupervised keyphrase generation (UKG), leveraging a pseudo label generated from a document title. Previous UKG method extracts all phrases from a corpus to build a phrase bank, then draws candidate absent keyphrases related to a document from the phrase bank to generate a pseudo label. However, we observed that when separating the document title from the document body, a significant number of phrases absent from the document body are included in the title. Based on this observation, we propose an effective method for generating pseudo labels using phrases mined from the document title. We initially train BART using these pseudo labels (TPG) and then perform supervised fine-tuning on a small amount of human-annotated data, which we term low-resource fine-tuning (LRFT). Experimental results on five benchmark datasets demonstrate that our method outperforms existing low-resource keyphrase generation approaches even with fewer labeled data, showing strength in generating absent keyphrases. Moreover, our model trained solely with TPG, without any labeled data, surpasses previous UKG method, highlighting the effectiveness of utilizing titles over a phrase bank. The code is available at https://github.com/kangnlp/low-resource-kpgen-through-TPG.
%U https://aclanthology.org/2024.lrec-main.775
%P 8853-8865
Markdown (Informal)
[Improving Low-Resource Keyphrase Generation through Unsupervised Title Phrase Generation](https://aclanthology.org/2024.lrec-main.775) (Kang & Shin, LREC-COLING 2024)
ACL