@inproceedings{park-etal-2024-gendex,
title = "{GENDEX}: Generative Data Augmentation Strategy Leveraging External Data for Abstractive Dialogue Summarization",
author = "Park, Sangwon and
Choi, Hongseok and
Choi, Dongha and
Lee, Hyunju",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.188",
doi = "10.18653/v1/2024.findings-acl.188",
pages = "3171--3185",
abstract = "With the proliferation of digital communication, dialogue summarization has become increasingly important. However, it still faces a shortage of data. To address this issue, we developed **Gen**erative **D**ata Augmentation Strategy Leveraging **Ex**ternal Data for Abstractive Dialogue Summarization (**GENDEX**), which is based on the hypothetical foundation that texts containing people and their interpersonal interactions can potentially serve as summaries of corresponding dialogues. We filter short texts containing people and resolve coreferences for better contextual analysis. We then identify the semantic roles of words within the texts and filter them based on the patterns observed in the dialogue summarization datasets. Using these texts, we generate synthetic dialogues through a controlled generation method. To better leverage the augmented data, we utilize noise-tolerant training to fine-tune the summarization model. The experimental results demonstrate the effectiveness of our proposed method, showing its robust performance, generalizability, and scalability. Moreover, performance improvements by *GENDEX* were observed regardless of complexity of dialogues. The code is available at https://github.com/DMCB-GIST/GENDEX.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="park-etal-2024-gendex">
<titleInfo>
<title>GENDEX: Generative Data Augmentation Strategy Leveraging External Data for Abstractive Dialogue Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sangwon</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongseok</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongha</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hyunju</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the proliferation of digital communication, dialogue summarization has become increasingly important. However, it still faces a shortage of data. To address this issue, we developed **Gen**erative **D**ata Augmentation Strategy Leveraging **Ex**ternal Data for Abstractive Dialogue Summarization (**GENDEX**), which is based on the hypothetical foundation that texts containing people and their interpersonal interactions can potentially serve as summaries of corresponding dialogues. We filter short texts containing people and resolve coreferences for better contextual analysis. We then identify the semantic roles of words within the texts and filter them based on the patterns observed in the dialogue summarization datasets. Using these texts, we generate synthetic dialogues through a controlled generation method. To better leverage the augmented data, we utilize noise-tolerant training to fine-tune the summarization model. The experimental results demonstrate the effectiveness of our proposed method, showing its robust performance, generalizability, and scalability. Moreover, performance improvements by *GENDEX* were observed regardless of complexity of dialogues. The code is available at https://github.com/DMCB-GIST/GENDEX.</abstract>
<identifier type="citekey">park-etal-2024-gendex</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.188</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.188</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>3171</start>
<end>3185</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GENDEX: Generative Data Augmentation Strategy Leveraging External Data for Abstractive Dialogue Summarization
%A Park, Sangwon
%A Choi, Hongseok
%A Choi, Dongha
%A Lee, Hyunju
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F park-etal-2024-gendex
%X With the proliferation of digital communication, dialogue summarization has become increasingly important. However, it still faces a shortage of data. To address this issue, we developed **Gen**erative **D**ata Augmentation Strategy Leveraging **Ex**ternal Data for Abstractive Dialogue Summarization (**GENDEX**), which is based on the hypothetical foundation that texts containing people and their interpersonal interactions can potentially serve as summaries of corresponding dialogues. We filter short texts containing people and resolve coreferences for better contextual analysis. We then identify the semantic roles of words within the texts and filter them based on the patterns observed in the dialogue summarization datasets. Using these texts, we generate synthetic dialogues through a controlled generation method. To better leverage the augmented data, we utilize noise-tolerant training to fine-tune the summarization model. The experimental results demonstrate the effectiveness of our proposed method, showing its robust performance, generalizability, and scalability. Moreover, performance improvements by *GENDEX* were observed regardless of complexity of dialogues. The code is available at https://github.com/DMCB-GIST/GENDEX.
%R 10.18653/v1/2024.findings-acl.188
%U https://aclanthology.org/2024.findings-acl.188
%U https://doi.org/10.18653/v1/2024.findings-acl.188
%P 3171-3185
Markdown (Informal)
[GENDEX: Generative Data Augmentation Strategy Leveraging External Data for Abstractive Dialogue Summarization](https://aclanthology.org/2024.findings-acl.188) (Park et al., Findings 2024)
ACL