@inproceedings{liu-etal-2024-mds,
title = "{MDS}: A Fine-Grained Dataset for Multi-Modal Dialogue Summarization",
author = "Liu, Zhipeng and
Zhang, Xiaoming and
Zhang, Litian and
Yu, Zelong",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.970/",
pages = "11123--11137",
abstract = "Due to the explosion of various dialogue scenes, summarizing the dialogue into a short message has drawn much attention recently. In the multi-modal dialogue scene, people tend to use tone and body language to illustrate their intentions. While traditional dialogue summarization has predominantly focused on textual content, this approach may overlook vital visual and audio information essential for understanding multi-modal interactions. Recognizing the established field of multi-modal dialogue summarization, we develop a new multi-modal dialogue summarization dataset (MDS), which aims to enhance the variety and scope of data available for this research area. MDS provides a demanding testbed for multi-modal dialogue summarization. Subsequently, we conducted a comparative analysis of various summarization techniques on MDS and found that the existing methods tend to produce redundant and incoherent summaries. All of the models generate unfaithful facts to some degree, suggesting future research directions. MDS is available at https://github.com/R00kkie/MDS."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2024-mds">
<titleInfo>
<title>MDS: A Fine-Grained Dataset for Multi-Modal Dialogue Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhipeng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoming</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Litian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zelong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Due to the explosion of various dialogue scenes, summarizing the dialogue into a short message has drawn much attention recently. In the multi-modal dialogue scene, people tend to use tone and body language to illustrate their intentions. While traditional dialogue summarization has predominantly focused on textual content, this approach may overlook vital visual and audio information essential for understanding multi-modal interactions. Recognizing the established field of multi-modal dialogue summarization, we develop a new multi-modal dialogue summarization dataset (MDS), which aims to enhance the variety and scope of data available for this research area. MDS provides a demanding testbed for multi-modal dialogue summarization. Subsequently, we conducted a comparative analysis of various summarization techniques on MDS and found that the existing methods tend to produce redundant and incoherent summaries. All of the models generate unfaithful facts to some degree, suggesting future research directions. MDS is available at https://github.com/R00kkie/MDS.</abstract>
<identifier type="citekey">liu-etal-2024-mds</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.970/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>11123</start>
<end>11137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MDS: A Fine-Grained Dataset for Multi-Modal Dialogue Summarization
%A Liu, Zhipeng
%A Zhang, Xiaoming
%A Zhang, Litian
%A Yu, Zelong
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F liu-etal-2024-mds
%X Due to the explosion of various dialogue scenes, summarizing the dialogue into a short message has drawn much attention recently. In the multi-modal dialogue scene, people tend to use tone and body language to illustrate their intentions. While traditional dialogue summarization has predominantly focused on textual content, this approach may overlook vital visual and audio information essential for understanding multi-modal interactions. Recognizing the established field of multi-modal dialogue summarization, we develop a new multi-modal dialogue summarization dataset (MDS), which aims to enhance the variety and scope of data available for this research area. MDS provides a demanding testbed for multi-modal dialogue summarization. Subsequently, we conducted a comparative analysis of various summarization techniques on MDS and found that the existing methods tend to produce redundant and incoherent summaries. All of the models generate unfaithful facts to some degree, suggesting future research directions. MDS is available at https://github.com/R00kkie/MDS.
%U https://aclanthology.org/2024.lrec-main.970/
%P 11123-11137
Markdown (Informal)
[MDS: A Fine-Grained Dataset for Multi-Modal Dialogue Summarization](https://aclanthology.org/2024.lrec-main.970/) (Liu et al., LREC-COLING 2024)
ACL