@inproceedings{steindl-etal-2023-controlled,
title = "Controlled Data Augmentation for Training Task-Oriented Dialog Systems with Low Resource Data",
author = {Steindl, Sebastian and
Sch{\"a}fer, Ulrich and
Ludwig, Bernd},
editor = "Surdeanu, Mihai and
Riloff, Ellen and
Chiticariu, Laura and
Frietag, Dayne and
Hahn-Powell, Gus and
Morrison, Clayton T. and
Noriega-Atala, Enrique and
Sharp, Rebecca and
Valenzuela-Escarcega, Marco",
booktitle = "Proceedings of the 2nd Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.pandl-1.9",
doi = "10.18653/v1/2023.pandl-1.9",
pages = "92--102",
abstract = "Modern dialog systems rely on Deep Learning to train transformer-based model architectures. These notoriously rely on large amounts of training data. However, the collection of conversational data is often a tedious and costly process. This is especially true for Task-Oriented Dialogs, where the system ought to help the user achieve specific tasks, such as making reservations. We investigate a controlled strategy for dialog synthesis. Our method generates utterances based on dialog annotations in a sequence-to-sequence manner. Besides exploring the viability of the approach itself, we also explore the effect of constrained beam search on the generation capabilities. Moreover, we analyze the effectiveness of the proposed method as a data augmentation by studying the impact the synthetic dialogs have on training dialog systems. We perform the experiments in multiple settings, simulating various amounts of ground-truth data. Our work shows that a controlled generation approach is a viable method to synthesize Task-Oriented Dialogs, that can in turn be used to train dialog systems. We were able to improve this process by utilizing constrained beam search.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steindl-etal-2023-controlled">
<titleInfo>
<title>Controlled Data Augmentation for Training Task-Oriented Dialog Systems with Low Resource Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Steindl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulrich</namePart>
<namePart type="family">Schäfer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernd</namePart>
<namePart type="family">Ludwig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Surdeanu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Chiticariu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dayne</namePart>
<namePart type="family">Frietag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gus</namePart>
<namePart type="family">Hahn-Powell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clayton</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Morrison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrique</namePart>
<namePart type="family">Noriega-Atala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Sharp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Valenzuela-Escarcega</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern dialog systems rely on Deep Learning to train transformer-based model architectures. These notoriously rely on large amounts of training data. However, the collection of conversational data is often a tedious and costly process. This is especially true for Task-Oriented Dialogs, where the system ought to help the user achieve specific tasks, such as making reservations. We investigate a controlled strategy for dialog synthesis. Our method generates utterances based on dialog annotations in a sequence-to-sequence manner. Besides exploring the viability of the approach itself, we also explore the effect of constrained beam search on the generation capabilities. Moreover, we analyze the effectiveness of the proposed method as a data augmentation by studying the impact the synthetic dialogs have on training dialog systems. We perform the experiments in multiple settings, simulating various amounts of ground-truth data. Our work shows that a controlled generation approach is a viable method to synthesize Task-Oriented Dialogs, that can in turn be used to train dialog systems. We were able to improve this process by utilizing constrained beam search.</abstract>
<identifier type="citekey">steindl-etal-2023-controlled</identifier>
<identifier type="doi">10.18653/v1/2023.pandl-1.9</identifier>
<location>
<url>https://aclanthology.org/2023.pandl-1.9</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>92</start>
<end>102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Controlled Data Augmentation for Training Task-Oriented Dialog Systems with Low Resource Data
%A Steindl, Sebastian
%A Schäfer, Ulrich
%A Ludwig, Bernd
%Y Surdeanu, Mihai
%Y Riloff, Ellen
%Y Chiticariu, Laura
%Y Frietag, Dayne
%Y Hahn-Powell, Gus
%Y Morrison, Clayton T.
%Y Noriega-Atala, Enrique
%Y Sharp, Rebecca
%Y Valenzuela-Escarcega, Marco
%S Proceedings of the 2nd Workshop on Pattern-based Approaches to NLP in the Age of Deep Learning
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F steindl-etal-2023-controlled
%X Modern dialog systems rely on Deep Learning to train transformer-based model architectures. These notoriously rely on large amounts of training data. However, the collection of conversational data is often a tedious and costly process. This is especially true for Task-Oriented Dialogs, where the system ought to help the user achieve specific tasks, such as making reservations. We investigate a controlled strategy for dialog synthesis. Our method generates utterances based on dialog annotations in a sequence-to-sequence manner. Besides exploring the viability of the approach itself, we also explore the effect of constrained beam search on the generation capabilities. Moreover, we analyze the effectiveness of the proposed method as a data augmentation by studying the impact the synthetic dialogs have on training dialog systems. We perform the experiments in multiple settings, simulating various amounts of ground-truth data. Our work shows that a controlled generation approach is a viable method to synthesize Task-Oriented Dialogs, that can in turn be used to train dialog systems. We were able to improve this process by utilizing constrained beam search.
%R 10.18653/v1/2023.pandl-1.9
%U https://aclanthology.org/2023.pandl-1.9
%U https://doi.org/10.18653/v1/2023.pandl-1.9
%P 92-102
Markdown (Informal)
[Controlled Data Augmentation for Training Task-Oriented Dialog Systems with Low Resource Data](https://aclanthology.org/2023.pandl-1.9) (Steindl et al., PANDL-WS 2023)
ACL