@inproceedings{palmero-aprosio-etal-2019-neural,
title = "Neural Text Simplification in Low-Resource Conditions Using Weak Supervision",
author = "Palmero Aprosio, Alessio and
Tonelli, Sara and
Turchi, Marco and
Negri, Matteo and
Di Gangi, Mattia A.",
editor = "Bosselut, Antoine and
Celikyilmaz, Asli and
Ghazvininejad, Marjan and
Iyer, Srinivasan and
Khandelwal, Urvashi and
Rashkin, Hannah and
Wolf, Thomas",
booktitle = "Proceedings of the Workshop on Methods for Optimizing and Evaluating Neural Language Generation",
month = jun,
year = "2019",
address = "Minneapolis, Minnesota",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-2305",
doi = "10.18653/v1/W19-2305",
pages = "37--44",
abstract = "Neural text simplification has gained increasing attention in the NLP community thanks to recent advancements in deep sequence-to-sequence learning. Most recent efforts with such a data-demanding paradigm have dealt with the English language, for which sizeable training datasets are currently available to deploy competitive models. Similar improvements on less resource-rich languages are conditioned either to intensive manual work to create training data, or to the design of effective automatic generation techniques to bypass the data acquisition bottleneck. Inspired by the machine translation field, in which synthetic parallel pairs generated from monolingual data yield significant improvements to neural models, in this paper we exploit large amounts of heterogeneous data to automatically select simple sentences, which are then used to create synthetic simplification pairs. We also evaluate other solutions, such as oversampling and the use of external word embeddings to be fed to the neural simplification system. Our approach is evaluated on Italian and Spanish, for which few thousand gold sentence pairs are available. The results show that these techniques yield performance improvements over a baseline sequence-to-sequence configuration.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="palmero-aprosio-etal-2019-neural">
<titleInfo>
<title>Neural Text Simplification in Low-Resource Conditions Using Weak Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alessio</namePart>
<namePart type="family">Palmero Aprosio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Tonelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Negri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mattia</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Di Gangi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Methods for Optimizing and Evaluating Neural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asli</namePart>
<namePart type="family">Celikyilmaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marjan</namePart>
<namePart type="family">Ghazvininejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivasan</namePart>
<namePart type="family">Iyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Urvashi</namePart>
<namePart type="family">Khandelwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hannah</namePart>
<namePart type="family">Rashkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Wolf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Minneapolis, Minnesota</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural text simplification has gained increasing attention in the NLP community thanks to recent advancements in deep sequence-to-sequence learning. Most recent efforts with such a data-demanding paradigm have dealt with the English language, for which sizeable training datasets are currently available to deploy competitive models. Similar improvements on less resource-rich languages are conditioned either to intensive manual work to create training data, or to the design of effective automatic generation techniques to bypass the data acquisition bottleneck. Inspired by the machine translation field, in which synthetic parallel pairs generated from monolingual data yield significant improvements to neural models, in this paper we exploit large amounts of heterogeneous data to automatically select simple sentences, which are then used to create synthetic simplification pairs. We also evaluate other solutions, such as oversampling and the use of external word embeddings to be fed to the neural simplification system. Our approach is evaluated on Italian and Spanish, for which few thousand gold sentence pairs are available. The results show that these techniques yield performance improvements over a baseline sequence-to-sequence configuration.</abstract>
<identifier type="citekey">palmero-aprosio-etal-2019-neural</identifier>
<identifier type="doi">10.18653/v1/W19-2305</identifier>
<location>
<url>https://aclanthology.org/W19-2305</url>
</location>
<part>
<date>2019-06</date>
<extent unit="page">
<start>37</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Neural Text Simplification in Low-Resource Conditions Using Weak Supervision
%A Palmero Aprosio, Alessio
%A Tonelli, Sara
%A Turchi, Marco
%A Negri, Matteo
%A Di Gangi, Mattia A.
%Y Bosselut, Antoine
%Y Celikyilmaz, Asli
%Y Ghazvininejad, Marjan
%Y Iyer, Srinivasan
%Y Khandelwal, Urvashi
%Y Rashkin, Hannah
%Y Wolf, Thomas
%S Proceedings of the Workshop on Methods for Optimizing and Evaluating Neural Language Generation
%D 2019
%8 June
%I Association for Computational Linguistics
%C Minneapolis, Minnesota
%F palmero-aprosio-etal-2019-neural
%X Neural text simplification has gained increasing attention in the NLP community thanks to recent advancements in deep sequence-to-sequence learning. Most recent efforts with such a data-demanding paradigm have dealt with the English language, for which sizeable training datasets are currently available to deploy competitive models. Similar improvements on less resource-rich languages are conditioned either to intensive manual work to create training data, or to the design of effective automatic generation techniques to bypass the data acquisition bottleneck. Inspired by the machine translation field, in which synthetic parallel pairs generated from monolingual data yield significant improvements to neural models, in this paper we exploit large amounts of heterogeneous data to automatically select simple sentences, which are then used to create synthetic simplification pairs. We also evaluate other solutions, such as oversampling and the use of external word embeddings to be fed to the neural simplification system. Our approach is evaluated on Italian and Spanish, for which few thousand gold sentence pairs are available. The results show that these techniques yield performance improvements over a baseline sequence-to-sequence configuration.
%R 10.18653/v1/W19-2305
%U https://aclanthology.org/W19-2305
%U https://doi.org/10.18653/v1/W19-2305
%P 37-44
Markdown (Informal)
[Neural Text Simplification in Low-Resource Conditions Using Weak Supervision](https://aclanthology.org/W19-2305) (Palmero Aprosio et al., NAACL 2019)
ACL