@inproceedings{rios-etal-2021-new,
title = "A New Dataset and Efficient Baselines for Document-level Text Simplification in {G}erman",
author = {Rios, Annette and
Spring, Nicolas and
Kew, Tannon and
Kostrzewa, Marek and
S{\"a}uberli, Andreas and
M{\"u}ller, Mathias and
Ebling, Sarah},
editor = "Carenini, Giuseppe and
Cheung, Jackie Chi Kit and
Dong, Yue and
Liu, Fei and
Wang, Lu",
booktitle = "Proceedings of the Third Workshop on New Frontiers in Summarization",
month = nov,
year = "2021",
address = "Online and in Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.newsum-1.16",
doi = "10.18653/v1/2021.newsum-1.16",
pages = "152--161",
abstract = "The task of document-level text simplification is very similar to summarization with the additional difficulty of reducing complexity. We introduce a newly collected data set of German texts, collected from the Swiss news magazine 20 Minuten ({`}20 Minutes{'}) that consists of full articles paired with simplified summaries. Furthermore, we present experiments on automatic text simplification with the pretrained multilingual mBART and a modified version thereof that is more memory-friendly, using both our new data set and existing simplification corpora. Our modifications of mBART let us train at a lower memory cost without much loss in performance, in fact, the smaller mBART even improves over the standard model in a setting with multiple simplification levels.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rios-etal-2021-new">
<titleInfo>
<title>A New Dataset and Efficient Baselines for Document-level Text Simplification in German</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Spring</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tannon</namePart>
<namePart type="family">Kew</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Kostrzewa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Säuberli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Müller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Ebling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on New Frontiers in Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Carenini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="given">Chi</namePart>
<namePart type="given">Kit</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and in Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of document-level text simplification is very similar to summarization with the additional difficulty of reducing complexity. We introduce a newly collected data set of German texts, collected from the Swiss news magazine 20 Minuten (‘20 Minutes’) that consists of full articles paired with simplified summaries. Furthermore, we present experiments on automatic text simplification with the pretrained multilingual mBART and a modified version thereof that is more memory-friendly, using both our new data set and existing simplification corpora. Our modifications of mBART let us train at a lower memory cost without much loss in performance, in fact, the smaller mBART even improves over the standard model in a setting with multiple simplification levels.</abstract>
<identifier type="citekey">rios-etal-2021-new</identifier>
<identifier type="doi">10.18653/v1/2021.newsum-1.16</identifier>
<location>
<url>https://aclanthology.org/2021.newsum-1.16</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>152</start>
<end>161</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A New Dataset and Efficient Baselines for Document-level Text Simplification in German
%A Rios, Annette
%A Spring, Nicolas
%A Kew, Tannon
%A Kostrzewa, Marek
%A Säuberli, Andreas
%A Müller, Mathias
%A Ebling, Sarah
%Y Carenini, Giuseppe
%Y Cheung, Jackie Chi Kit
%Y Dong, Yue
%Y Liu, Fei
%Y Wang, Lu
%S Proceedings of the Third Workshop on New Frontiers in Summarization
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and in Dominican Republic
%F rios-etal-2021-new
%X The task of document-level text simplification is very similar to summarization with the additional difficulty of reducing complexity. We introduce a newly collected data set of German texts, collected from the Swiss news magazine 20 Minuten (‘20 Minutes’) that consists of full articles paired with simplified summaries. Furthermore, we present experiments on automatic text simplification with the pretrained multilingual mBART and a modified version thereof that is more memory-friendly, using both our new data set and existing simplification corpora. Our modifications of mBART let us train at a lower memory cost without much loss in performance, in fact, the smaller mBART even improves over the standard model in a setting with multiple simplification levels.
%R 10.18653/v1/2021.newsum-1.16
%U https://aclanthology.org/2021.newsum-1.16
%U https://doi.org/10.18653/v1/2021.newsum-1.16
%P 152-161
Markdown (Informal)
[A New Dataset and Efficient Baselines for Document-level Text Simplification in German](https://aclanthology.org/2021.newsum-1.16) (Rios et al., NewSum 2021)
ACL