@inproceedings{wolhandler-etal-2022-multi,
title = "How {``}Multi{''} is Multi-Document Summarization?",
author = "Wolhandler, Ruben and
Cattan, Arie and
Ernst, Ori and
Dagan, Ido",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.389",
doi = "10.18653/v1/2022.emnlp-main.389",
pages = "5761--5769",
abstract = "The task of multi-document summarization (MDS) aims at models that, given multiple documents as input, are able to generate a summary that combines disperse information, originally spread {\_}{\_}across{\_}{\_} these documents. Accordingly, it is expected that both reference summaries in MDS datasets, as well as system summaries, would indeed be based on such dispersed information. In this paper, we argue for quantifying and assessing this expectation. To that end, we propose an automated measure for evaluating the degree to which a summary is {``}disperse{''}, in the sense of the number of source documents needed to cover its content. We apply our measure to empirically analyze several popular MDS datasets, with respect to their reference summaries, as well as the output of state-of-the-art systems. Our results show that certain MDS datasets barely require combining information from multiple documents, where a single document often covers the full summary content. Overall, we advocate using our metric for assessing and improving the degree to which summarization datasets require combining multi-document information, and similarly how summarization models actually meet this challenge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wolhandler-etal-2022-multi">
<titleInfo>
<title>How “Multi” is Multi-Document Summarization?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruben</namePart>
<namePart type="family">Wolhandler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arie</namePart>
<namePart type="family">Cattan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ori</namePart>
<namePart type="family">Ernst</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ido</namePart>
<namePart type="family">Dagan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of multi-document summarization (MDS) aims at models that, given multiple documents as input, are able to generate a summary that combines disperse information, originally spread __across__ these documents. Accordingly, it is expected that both reference summaries in MDS datasets, as well as system summaries, would indeed be based on such dispersed information. In this paper, we argue for quantifying and assessing this expectation. To that end, we propose an automated measure for evaluating the degree to which a summary is “disperse”, in the sense of the number of source documents needed to cover its content. We apply our measure to empirically analyze several popular MDS datasets, with respect to their reference summaries, as well as the output of state-of-the-art systems. Our results show that certain MDS datasets barely require combining information from multiple documents, where a single document often covers the full summary content. Overall, we advocate using our metric for assessing and improving the degree to which summarization datasets require combining multi-document information, and similarly how summarization models actually meet this challenge.</abstract>
<identifier type="citekey">wolhandler-etal-2022-multi</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.389</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.389</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5761</start>
<end>5769</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How “Multi” is Multi-Document Summarization?
%A Wolhandler, Ruben
%A Cattan, Arie
%A Ernst, Ori
%A Dagan, Ido
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F wolhandler-etal-2022-multi
%X The task of multi-document summarization (MDS) aims at models that, given multiple documents as input, are able to generate a summary that combines disperse information, originally spread __across__ these documents. Accordingly, it is expected that both reference summaries in MDS datasets, as well as system summaries, would indeed be based on such dispersed information. In this paper, we argue for quantifying and assessing this expectation. To that end, we propose an automated measure for evaluating the degree to which a summary is “disperse”, in the sense of the number of source documents needed to cover its content. We apply our measure to empirically analyze several popular MDS datasets, with respect to their reference summaries, as well as the output of state-of-the-art systems. Our results show that certain MDS datasets barely require combining information from multiple documents, where a single document often covers the full summary content. Overall, we advocate using our metric for assessing and improving the degree to which summarization datasets require combining multi-document information, and similarly how summarization models actually meet this challenge.
%R 10.18653/v1/2022.emnlp-main.389
%U https://aclanthology.org/2022.emnlp-main.389
%U https://doi.org/10.18653/v1/2022.emnlp-main.389
%P 5761-5769
Markdown (Informal)
[How “Multi” is Multi-Document Summarization?](https://aclanthology.org/2022.emnlp-main.389) (Wolhandler et al., EMNLP 2022)
ACL
- Ruben Wolhandler, Arie Cattan, Ori Ernst, and Ido Dagan. 2022. How “Multi” is Multi-Document Summarization?. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 5761–5769, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.