@inproceedings{occhipinti-etal-2024-prodigy,
title = "{PRODIG}y: a {PRO}file-based {DI}alogue Generation dataset",
author = "Occhipinti, Daniela and
Tekiro{\u{g}}lu, Serra Sinem and
Guerini, Marco",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2024",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-naacl.222",
doi = "10.18653/v1/2024.findings-naacl.222",
pages = "3500--3514",
abstract = "Providing dialogue agents with a profile representation can improve their consistency and coherence, leading to better conversations. However, current profile-based dialogue datasets for training such agents contain either explicit profile representations that are simple and dialogue-specific, or implicit representations that are difficult to collect. In this work, we introduce the PRODIGy (PROfile-based DIalogue Generation) dataset, which brings diverse representations together, providing a more comprehensive profile dimension set for each speaker. This resource comprises more than 20k dialogues, sourced from movie scripts, aligned with speaker representations such as communication style, biography, personality and gender. Initial experiments with diverse baselines show that providing generative language models with these aspects of a profile, both separately and jointly, enhances models{'} performance. This improvement holds true in both in-domain and cross-domain settings, for both fine-tuned and instruction-based LLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="occhipinti-etal-2024-prodigy">
<titleInfo>
<title>PRODIGy: a PROfile-based DIalogue Generation dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniela</namePart>
<namePart type="family">Occhipinti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serra</namePart>
<namePart type="given">Sinem</namePart>
<namePart type="family">Tekiroğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Guerini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Providing dialogue agents with a profile representation can improve their consistency and coherence, leading to better conversations. However, current profile-based dialogue datasets for training such agents contain either explicit profile representations that are simple and dialogue-specific, or implicit representations that are difficult to collect. In this work, we introduce the PRODIGy (PROfile-based DIalogue Generation) dataset, which brings diverse representations together, providing a more comprehensive profile dimension set for each speaker. This resource comprises more than 20k dialogues, sourced from movie scripts, aligned with speaker representations such as communication style, biography, personality and gender. Initial experiments with diverse baselines show that providing generative language models with these aspects of a profile, both separately and jointly, enhances models’ performance. This improvement holds true in both in-domain and cross-domain settings, for both fine-tuned and instruction-based LLMs.</abstract>
<identifier type="citekey">occhipinti-etal-2024-prodigy</identifier>
<identifier type="doi">10.18653/v1/2024.findings-naacl.222</identifier>
<location>
<url>https://aclanthology.org/2024.findings-naacl.222</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>3500</start>
<end>3514</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PRODIGy: a PROfile-based DIalogue Generation dataset
%A Occhipinti, Daniela
%A Tekiroğlu, Serra Sinem
%A Guerini, Marco
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Findings of the Association for Computational Linguistics: NAACL 2024
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F occhipinti-etal-2024-prodigy
%X Providing dialogue agents with a profile representation can improve their consistency and coherence, leading to better conversations. However, current profile-based dialogue datasets for training such agents contain either explicit profile representations that are simple and dialogue-specific, or implicit representations that are difficult to collect. In this work, we introduce the PRODIGy (PROfile-based DIalogue Generation) dataset, which brings diverse representations together, providing a more comprehensive profile dimension set for each speaker. This resource comprises more than 20k dialogues, sourced from movie scripts, aligned with speaker representations such as communication style, biography, personality and gender. Initial experiments with diverse baselines show that providing generative language models with these aspects of a profile, both separately and jointly, enhances models’ performance. This improvement holds true in both in-domain and cross-domain settings, for both fine-tuned and instruction-based LLMs.
%R 10.18653/v1/2024.findings-naacl.222
%U https://aclanthology.org/2024.findings-naacl.222
%U https://doi.org/10.18653/v1/2024.findings-naacl.222
%P 3500-3514
Markdown (Informal)
[PRODIGy: a PROfile-based DIalogue Generation dataset](https://aclanthology.org/2024.findings-naacl.222) (Occhipinti et al., Findings 2024)
ACL
- Daniela Occhipinti, Serra Sinem Tekiroğlu, and Marco Guerini. 2024. PRODIGy: a PROfile-based DIalogue Generation dataset. In Findings of the Association for Computational Linguistics: NAACL 2024, pages 3500–3514, Mexico City, Mexico. Association for Computational Linguistics.