@inproceedings{montella-etal-2020-denoising,
title = "Denoising Pre-Training and Data Augmentation Strategies for Enhanced {RDF} Verbalization with Transformers",
author = "Montella, Sebastien and
Fabre, Betty and
Urvoy, Tanguy and
Heinecke, Johannes and
Rojas-Barahona, Lina",
editor = "Castro Ferreira, Thiago and
Gardent, Claire and
Ilinykh, Nikolai and
van der Lee, Chris and
Mille, Simon and
Moussallem, Diego and
Shimorina, Anastasia",
booktitle = "Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)",
month = "12",
year = "2020",
address = "Dublin, Ireland (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.webnlg-1.9",
pages = "89--99",
abstract = "The task of verbalization of RDF triples has known a growth in popularity due to the rising ubiquity of Knowledge Bases (KBs). The formalism of RDF triples is a simple and efficient way to store facts at a large scale. However, its abstract representation makes it difficult for humans to interpret. For this purpose, the WebNLG challenge aims at promoting automated RDF-to-text generation. We propose to leverage pre-trainings from augmented data with the Transformer model using a data augmentation strategy. Our experiment results show a minimum relative increases of 3.73{\%}, 126.05{\%} and 88.16{\%} in BLEU score for seen categories, unseen entities and unseen categories respectively over the standard training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="montella-etal-2020-denoising">
<titleInfo>
<title>Denoising Pre-Training and Data Augmentation Strategies for Enhanced RDF Verbalization with Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Betty</namePart>
<namePart type="family">Fabre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanguy</namePart>
<namePart type="family">Urvoy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Heinecke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Gardent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolai</namePart>
<namePart type="family">Ilinykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">van der Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Moussallem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastasia</namePart>
<namePart type="family">Shimorina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The task of verbalization of RDF triples has known a growth in popularity due to the rising ubiquity of Knowledge Bases (KBs). The formalism of RDF triples is a simple and efficient way to store facts at a large scale. However, its abstract representation makes it difficult for humans to interpret. For this purpose, the WebNLG challenge aims at promoting automated RDF-to-text generation. We propose to leverage pre-trainings from augmented data with the Transformer model using a data augmentation strategy. Our experiment results show a minimum relative increases of 3.73%, 126.05% and 88.16% in BLEU score for seen categories, unseen entities and unseen categories respectively over the standard training.</abstract>
<identifier type="citekey">montella-etal-2020-denoising</identifier>
<location>
<url>https://aclanthology.org/2020.webnlg-1.9</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>89</start>
<end>99</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Denoising Pre-Training and Data Augmentation Strategies for Enhanced RDF Verbalization with Transformers
%A Montella, Sebastien
%A Fabre, Betty
%A Urvoy, Tanguy
%A Heinecke, Johannes
%A Rojas-Barahona, Lina
%Y Castro Ferreira, Thiago
%Y Gardent, Claire
%Y Ilinykh, Nikolai
%Y van der Lee, Chris
%Y Mille, Simon
%Y Moussallem, Diego
%Y Shimorina, Anastasia
%S Proceedings of the 3rd International Workshop on Natural Language Generation from the Semantic Web (WebNLG+)
%D 2020
%8 December
%I Association for Computational Linguistics
%C Dublin, Ireland (Virtual)
%F montella-etal-2020-denoising
%X The task of verbalization of RDF triples has known a growth in popularity due to the rising ubiquity of Knowledge Bases (KBs). The formalism of RDF triples is a simple and efficient way to store facts at a large scale. However, its abstract representation makes it difficult for humans to interpret. For this purpose, the WebNLG challenge aims at promoting automated RDF-to-text generation. We propose to leverage pre-trainings from augmented data with the Transformer model using a data augmentation strategy. Our experiment results show a minimum relative increases of 3.73%, 126.05% and 88.16% in BLEU score for seen categories, unseen entities and unseen categories respectively over the standard training.
%U https://aclanthology.org/2020.webnlg-1.9
%P 89-99
Markdown (Informal)
[Denoising Pre-Training and Data Augmentation Strategies for Enhanced RDF Verbalization with Transformers](https://aclanthology.org/2020.webnlg-1.9) (Montella et al., WebNLG 2020)
ACL