@inproceedings{houbre-etal-2022-large,
title = "A Large-Scale Dataset for Biomedical Keyphrase Generation",
author = {Houbre, Ma{\"e}l and
Boudin, Florian and
Daille, Beatrice},
editor = "Lavelli, Alberto and
Holderness, Eben and
Jimeno Yepes, Antonio and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.louhi-1.6",
doi = "10.18653/v1/2022.louhi-1.6",
pages = "47--53",
abstract = "Keyphrase generation is the task consisting in generating a set of words or phrases that highlight the main topics of a document. There are few datasets for keyphrase generation in the biomedical domain and they do not meet the expectations in terms of size for training generative models. In this paper, we introduce kp-biomed, the first large-scale biomedical keyphrase generation dataset collected from PubMed abstracts. We train and release several generative models and conduct a series of experiments showing that using large scale datasets improves significantly the performances for present and absent keyphrase generation. The dataset and models are available online.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="houbre-etal-2022-large">
<titleInfo>
<title>A Large-Scale Dataset for Biomedical Keyphrase Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maël</namePart>
<namePart type="family">Houbre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Boudin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Daille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Keyphrase generation is the task consisting in generating a set of words or phrases that highlight the main topics of a document. There are few datasets for keyphrase generation in the biomedical domain and they do not meet the expectations in terms of size for training generative models. In this paper, we introduce kp-biomed, the first large-scale biomedical keyphrase generation dataset collected from PubMed abstracts. We train and release several generative models and conduct a series of experiments showing that using large scale datasets improves significantly the performances for present and absent keyphrase generation. The dataset and models are available online.</abstract>
<identifier type="citekey">houbre-etal-2022-large</identifier>
<identifier type="doi">10.18653/v1/2022.louhi-1.6</identifier>
<location>
<url>https://aclanthology.org/2022.louhi-1.6</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>47</start>
<end>53</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Large-Scale Dataset for Biomedical Keyphrase Generation
%A Houbre, Maël
%A Boudin, Florian
%A Daille, Beatrice
%Y Lavelli, Alberto
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F houbre-etal-2022-large
%X Keyphrase generation is the task consisting in generating a set of words or phrases that highlight the main topics of a document. There are few datasets for keyphrase generation in the biomedical domain and they do not meet the expectations in terms of size for training generative models. In this paper, we introduce kp-biomed, the first large-scale biomedical keyphrase generation dataset collected from PubMed abstracts. We train and release several generative models and conduct a series of experiments showing that using large scale datasets improves significantly the performances for present and absent keyphrase generation. The dataset and models are available online.
%R 10.18653/v1/2022.louhi-1.6
%U https://aclanthology.org/2022.louhi-1.6
%U https://doi.org/10.18653/v1/2022.louhi-1.6
%P 47-53
Markdown (Informal)
[A Large-Scale Dataset for Biomedical Keyphrase Generation](https://aclanthology.org/2022.louhi-1.6) (Houbre et al., Louhi 2022)
ACL
- Maël Houbre, Florian Boudin, and Beatrice Daille. 2022. A Large-Scale Dataset for Biomedical Keyphrase Generation. In Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI), pages 47–53, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.