@inproceedings{otmakhova-etal-2022-m3,
title = "{M}3: Multi-level dataset for Multi-document summarisation of Medical studies",
author = "Otmakhova, Yulia and
Verspoor, Karin and
Baldwin, Timothy and
Jimeno Yepes, Antonio and
Lau, Jey Han",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.286",
doi = "10.18653/v1/2022.findings-emnlp.286",
pages = "3887--3901",
abstract = "We present M3 (Multi-level dataset for Multi-document summarisation of Medical studies), a benchmark dataset for evaluating the quality of summarisation systems in the biomedical domain. The dataset contains sets of multiple input documents and target summaries of three levels of complexity: documents, sentences, and propositions. The dataset also includes several levels of annotation, including biomedical entities, direction, and strength of relations between them, and the discourse relationships between the input documents ({``}contradiction{''} or {``}agreement{''}). We showcase usage scenarios of the dataset by testing 10 generic and domain-specific summarisation models in a zero-shot setting, and introduce a probing task based on counterfactuals to test if models are aware of the direction and strength of the conclusions generated from input studies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="otmakhova-etal-2022-m3">
<titleInfo>
<title>M3: Multi-level dataset for Multi-document summarisation of Medical studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulia</namePart>
<namePart type="family">Otmakhova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karin</namePart>
<namePart type="family">Verspoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timothy</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jey</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Lau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present M3 (Multi-level dataset for Multi-document summarisation of Medical studies), a benchmark dataset for evaluating the quality of summarisation systems in the biomedical domain. The dataset contains sets of multiple input documents and target summaries of three levels of complexity: documents, sentences, and propositions. The dataset also includes several levels of annotation, including biomedical entities, direction, and strength of relations between them, and the discourse relationships between the input documents (“contradiction” or “agreement”). We showcase usage scenarios of the dataset by testing 10 generic and domain-specific summarisation models in a zero-shot setting, and introduce a probing task based on counterfactuals to test if models are aware of the direction and strength of the conclusions generated from input studies.</abstract>
<identifier type="citekey">otmakhova-etal-2022-m3</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.286</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.286</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>3887</start>
<end>3901</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T M3: Multi-level dataset for Multi-document summarisation of Medical studies
%A Otmakhova, Yulia
%A Verspoor, Karin
%A Baldwin, Timothy
%A Jimeno Yepes, Antonio
%A Lau, Jey Han
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F otmakhova-etal-2022-m3
%X We present M3 (Multi-level dataset for Multi-document summarisation of Medical studies), a benchmark dataset for evaluating the quality of summarisation systems in the biomedical domain. The dataset contains sets of multiple input documents and target summaries of three levels of complexity: documents, sentences, and propositions. The dataset also includes several levels of annotation, including biomedical entities, direction, and strength of relations between them, and the discourse relationships between the input documents (“contradiction” or “agreement”). We showcase usage scenarios of the dataset by testing 10 generic and domain-specific summarisation models in a zero-shot setting, and introduce a probing task based on counterfactuals to test if models are aware of the direction and strength of the conclusions generated from input studies.
%R 10.18653/v1/2022.findings-emnlp.286
%U https://aclanthology.org/2022.findings-emnlp.286
%U https://doi.org/10.18653/v1/2022.findings-emnlp.286
%P 3887-3901
Markdown (Informal)
[M3: Multi-level dataset for Multi-document summarisation of Medical studies](https://aclanthology.org/2022.findings-emnlp.286) (Otmakhova et al., Findings 2022)
ACL