@inproceedings{castro-ferreira-etal-2021-enriching,
title = "Enriching the {E}2{E} dataset",
author = "Castro Ferreira, Thiago and
Vaz, Helena and
Davis, Brian and
Pagano, Adriana",
editor = "Belz, Anya and
Fan, Angela and
Reiter, Ehud and
Sripada, Yaji",
booktitle = "Proceedings of the 14th International Conference on Natural Language Generation",
month = aug,
year = "2021",
address = "Aberdeen, Scotland, UK",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.inlg-1.18/",
doi = "10.18653/v1/2021.inlg-1.18",
pages = "177--183",
abstract = "This study introduces an enriched version of the E2E dataset, one of the most popular language resources for data-to-text NLG. We extract intermediate representations for popular pipeline tasks such as discourse ordering, text structuring, lexicalization and referring expression generation, enabling researchers to rapidly develop and evaluate their data-to-text pipeline systems. The intermediate representations are extracted by aligning non-linguistic and text representations through a process called delexicalization, which consists in replacing input referring expressions to entities/attributes with placeholders. The enriched dataset is publicly available."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="castro-ferreira-etal-2021-enriching">
<titleInfo>
<title>Enriching the E2E dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Vaz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adriana</namePart>
<namePart type="family">Pagano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anya</namePart>
<namePart type="family">Belz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaji</namePart>
<namePart type="family">Sripada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, Scotland, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This study introduces an enriched version of the E2E dataset, one of the most popular language resources for data-to-text NLG. We extract intermediate representations for popular pipeline tasks such as discourse ordering, text structuring, lexicalization and referring expression generation, enabling researchers to rapidly develop and evaluate their data-to-text pipeline systems. The intermediate representations are extracted by aligning non-linguistic and text representations through a process called delexicalization, which consists in replacing input referring expressions to entities/attributes with placeholders. The enriched dataset is publicly available.</abstract>
<identifier type="citekey">castro-ferreira-etal-2021-enriching</identifier>
<identifier type="doi">10.18653/v1/2021.inlg-1.18</identifier>
<location>
<url>https://aclanthology.org/2021.inlg-1.18/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>177</start>
<end>183</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Enriching the E2E dataset
%A Castro Ferreira, Thiago
%A Vaz, Helena
%A Davis, Brian
%A Pagano, Adriana
%Y Belz, Anya
%Y Fan, Angela
%Y Reiter, Ehud
%Y Sripada, Yaji
%S Proceedings of the 14th International Conference on Natural Language Generation
%D 2021
%8 August
%I Association for Computational Linguistics
%C Aberdeen, Scotland, UK
%F castro-ferreira-etal-2021-enriching
%X This study introduces an enriched version of the E2E dataset, one of the most popular language resources for data-to-text NLG. We extract intermediate representations for popular pipeline tasks such as discourse ordering, text structuring, lexicalization and referring expression generation, enabling researchers to rapidly develop and evaluate their data-to-text pipeline systems. The intermediate representations are extracted by aligning non-linguistic and text representations through a process called delexicalization, which consists in replacing input referring expressions to entities/attributes with placeholders. The enriched dataset is publicly available.
%R 10.18653/v1/2021.inlg-1.18
%U https://aclanthology.org/2021.inlg-1.18/
%U https://doi.org/10.18653/v1/2021.inlg-1.18
%P 177-183
Markdown (Informal)
[Enriching the E2E dataset](https://aclanthology.org/2021.inlg-1.18/) (Castro Ferreira et al., INLG 2021)
ACL
- Thiago Castro Ferreira, Helena Vaz, Brian Davis, and Adriana Pagano. 2021. Enriching the E2E dataset. In Proceedings of the 14th International Conference on Natural Language Generation, pages 177–183, Aberdeen, Scotland, UK. Association for Computational Linguistics.