@inproceedings{perez-beltrachini-lapata-2021-models,
title = "Models and Datasets for Cross-Lingual Summarisation",
author = "Perez-Beltrachini, Laura and
Lapata, Mirella",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.742",
doi = "10.18653/v1/2021.emnlp-main.742",
pages = "9408--9423",
abstract = "We present a cross-lingual summarisation corpus with long documents in a source language associated with multi-sentence summaries in a target language. The corpus covers twelve language pairs and directions for four European languages, namely Czech, English, French and German, and the methodology for its creation can be applied to several other languages. We derive cross-lingual document-summary instances from Wikipedia by combining lead paragraphs and articles{'} bodies from language aligned Wikipedia titles. We analyse the proposed cross-lingual summarisation task with automatic metrics and validate it with a human study. To illustrate the utility of our dataset we report experiments with multi-lingual pre-trained models in supervised, zero- and few-shot, and out-of-domain scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perez-beltrachini-lapata-2021-models">
<titleInfo>
<title>Models and Datasets for Cross-Lingual Summarisation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Perez-Beltrachini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mirella</namePart>
<namePart type="family">Lapata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a cross-lingual summarisation corpus with long documents in a source language associated with multi-sentence summaries in a target language. The corpus covers twelve language pairs and directions for four European languages, namely Czech, English, French and German, and the methodology for its creation can be applied to several other languages. We derive cross-lingual document-summary instances from Wikipedia by combining lead paragraphs and articles’ bodies from language aligned Wikipedia titles. We analyse the proposed cross-lingual summarisation task with automatic metrics and validate it with a human study. To illustrate the utility of our dataset we report experiments with multi-lingual pre-trained models in supervised, zero- and few-shot, and out-of-domain scenarios.</abstract>
<identifier type="citekey">perez-beltrachini-lapata-2021-models</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.742</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.742</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>9408</start>
<end>9423</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Models and Datasets for Cross-Lingual Summarisation
%A Perez-Beltrachini, Laura
%A Lapata, Mirella
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F perez-beltrachini-lapata-2021-models
%X We present a cross-lingual summarisation corpus with long documents in a source language associated with multi-sentence summaries in a target language. The corpus covers twelve language pairs and directions for four European languages, namely Czech, English, French and German, and the methodology for its creation can be applied to several other languages. We derive cross-lingual document-summary instances from Wikipedia by combining lead paragraphs and articles’ bodies from language aligned Wikipedia titles. We analyse the proposed cross-lingual summarisation task with automatic metrics and validate it with a human study. To illustrate the utility of our dataset we report experiments with multi-lingual pre-trained models in supervised, zero- and few-shot, and out-of-domain scenarios.
%R 10.18653/v1/2021.emnlp-main.742
%U https://aclanthology.org/2021.emnlp-main.742
%U https://doi.org/10.18653/v1/2021.emnlp-main.742
%P 9408-9423
Markdown (Informal)
[Models and Datasets for Cross-Lingual Summarisation](https://aclanthology.org/2021.emnlp-main.742) (Perez-Beltrachini & Lapata, EMNLP 2021)
ACL
- Laura Perez-Beltrachini and Mirella Lapata. 2021. Models and Datasets for Cross-Lingual Summarisation. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 9408–9423, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.