@inproceedings{lin-etal-2022-knowledge,
title = "Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks",
author = "Lin, Yen Ting and
Papangelis, Alexandros and
Kim, Seokhwan and
Hakkani-Tur, Dilek",
editor = "Lemon, Oliver and
Hakkani-Tur, Dilek and
Li, Junyi Jessy and
Ashrafzadeh, Arash and
Garcia, Daniel Hern{\'a}ndez and
Alikhani, Malihe and
Vandyke, David and
Du{\v{s}}ek, Ond{\v{r}}ej",
booktitle = "Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = sep,
year = "2022",
address = "Edinburgh, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigdial-1.3",
doi = "10.18653/v1/2022.sigdial-1.3",
pages = "26--38",
abstract = "While rich, open-domain textual data are generally available and may include interesting phenomena (humor, sarcasm, empathy, etc.) most are designed for language processing tasks, and are usually in a non-conversational format. In this work, we take a step towards automatically generating conversational data using Generative Conversational Networks, aiming to benefit from the breadth of available language and knowledge data, and train open domain social conversational agents. We evaluate our approach on conversations with and without knowledge on the Topical Chat dataset using automatic metrics and human evaluators. Our results show that for conversations without knowledge grounding, GCN can generalize from the seed data, producing novel conversations that are less relevant but more engaging and for knowledge-grounded conversations, it can produce more knowledge-focused, fluent, and engaging conversations. Specifically, we show that for open-domain conversations with 10{\%} of seed data, our approach performs close to the baseline that uses 100{\%} of the data, while for knowledge-grounded conversations, it achieves the same using only 1{\%} of the data, on human ratings of engagingness, fluency, and relevance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lin-etal-2022-knowledge">
<titleInfo>
<title>Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yen</namePart>
<namePart type="given">Ting</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandros</namePart>
<namePart type="family">Papangelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Oliver</namePart>
<namePart type="family">Lemon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arash</namePart>
<namePart type="family">Ashrafzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">Hernández</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Vandyke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Dušek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Edinburgh, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While rich, open-domain textual data are generally available and may include interesting phenomena (humor, sarcasm, empathy, etc.) most are designed for language processing tasks, and are usually in a non-conversational format. In this work, we take a step towards automatically generating conversational data using Generative Conversational Networks, aiming to benefit from the breadth of available language and knowledge data, and train open domain social conversational agents. We evaluate our approach on conversations with and without knowledge on the Topical Chat dataset using automatic metrics and human evaluators. Our results show that for conversations without knowledge grounding, GCN can generalize from the seed data, producing novel conversations that are less relevant but more engaging and for knowledge-grounded conversations, it can produce more knowledge-focused, fluent, and engaging conversations. Specifically, we show that for open-domain conversations with 10% of seed data, our approach performs close to the baseline that uses 100% of the data, while for knowledge-grounded conversations, it achieves the same using only 1% of the data, on human ratings of engagingness, fluency, and relevance.</abstract>
<identifier type="citekey">lin-etal-2022-knowledge</identifier>
<identifier type="doi">10.18653/v1/2022.sigdial-1.3</identifier>
<location>
<url>https://aclanthology.org/2022.sigdial-1.3</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>26</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks
%A Lin, Yen Ting
%A Papangelis, Alexandros
%A Kim, Seokhwan
%A Hakkani-Tur, Dilek
%Y Lemon, Oliver
%Y Hakkani-Tur, Dilek
%Y Li, Junyi Jessy
%Y Ashrafzadeh, Arash
%Y Garcia, Daniel Hernández
%Y Alikhani, Malihe
%Y Vandyke, David
%Y Dušek, Ondřej
%S Proceedings of the 23rd Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2022
%8 September
%I Association for Computational Linguistics
%C Edinburgh, UK
%F lin-etal-2022-knowledge
%X While rich, open-domain textual data are generally available and may include interesting phenomena (humor, sarcasm, empathy, etc.) most are designed for language processing tasks, and are usually in a non-conversational format. In this work, we take a step towards automatically generating conversational data using Generative Conversational Networks, aiming to benefit from the breadth of available language and knowledge data, and train open domain social conversational agents. We evaluate our approach on conversations with and without knowledge on the Topical Chat dataset using automatic metrics and human evaluators. Our results show that for conversations without knowledge grounding, GCN can generalize from the seed data, producing novel conversations that are less relevant but more engaging and for knowledge-grounded conversations, it can produce more knowledge-focused, fluent, and engaging conversations. Specifically, we show that for open-domain conversations with 10% of seed data, our approach performs close to the baseline that uses 100% of the data, while for knowledge-grounded conversations, it achieves the same using only 1% of the data, on human ratings of engagingness, fluency, and relevance.
%R 10.18653/v1/2022.sigdial-1.3
%U https://aclanthology.org/2022.sigdial-1.3
%U https://doi.org/10.18653/v1/2022.sigdial-1.3
%P 26-38
Markdown (Informal)
[Knowledge-Grounded Conversational Data Augmentation with Generative Conversational Networks](https://aclanthology.org/2022.sigdial-1.3) (Lin et al., SIGDIAL 2022)
ACL