@inproceedings{van-der-lee-etal-2020-cacapo,
title = "The {CACAPO} Dataset: A Multilingual, Multi-Domain Dataset for Neural Pipeline and End-to-End Data-to-Text Generation",
author = "van der Lee, Chris and
Emmery, Chris and
Wubben, Sander and
Krahmer, Emiel",
editor = "Davis, Brian and
Graham, Yvette and
Kelleher, John and
Sripada, Yaji",
booktitle = "Proceedings of the 13th International Conference on Natural Language Generation",
month = dec,
year = "2020",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.inlg-1.10",
doi = "10.18653/v1/2020.inlg-1.10",
pages = "68--79",
abstract = "This paper describes the CACAPO dataset, built for training both neural pipeline and end-to-end data-to-text language generation systems. The dataset is multilingual (Dutch and English), and contains almost 10,000 sentences from human-written news texts in the sports, weather, stocks, and incidents domain, together with aligned attribute-value paired data. The dataset is unique in that the linguistic variation and indirect ways of expressing data in these texts reflect the challenges of real world NLG tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="van-der-lee-etal-2020-cacapo">
<titleInfo>
<title>The CACAPO Dataset: A Multilingual, Multi-Domain Dataset for Neural Pipeline and End-to-End Data-to-Text Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">van der Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Emmery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sander</namePart>
<namePart type="family">Wubben</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emiel</namePart>
<namePart type="family">Krahmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Kelleher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaji</namePart>
<namePart type="family">Sripada</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the CACAPO dataset, built for training both neural pipeline and end-to-end data-to-text language generation systems. The dataset is multilingual (Dutch and English), and contains almost 10,000 sentences from human-written news texts in the sports, weather, stocks, and incidents domain, together with aligned attribute-value paired data. The dataset is unique in that the linguistic variation and indirect ways of expressing data in these texts reflect the challenges of real world NLG tasks.</abstract>
<identifier type="citekey">van-der-lee-etal-2020-cacapo</identifier>
<identifier type="doi">10.18653/v1/2020.inlg-1.10</identifier>
<location>
<url>https://aclanthology.org/2020.inlg-1.10</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>68</start>
<end>79</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The CACAPO Dataset: A Multilingual, Multi-Domain Dataset for Neural Pipeline and End-to-End Data-to-Text Generation
%A van der Lee, Chris
%A Emmery, Chris
%A Wubben, Sander
%A Krahmer, Emiel
%Y Davis, Brian
%Y Graham, Yvette
%Y Kelleher, John
%Y Sripada, Yaji
%S Proceedings of the 13th International Conference on Natural Language Generation
%D 2020
%8 December
%I Association for Computational Linguistics
%C Dublin, Ireland
%F van-der-lee-etal-2020-cacapo
%X This paper describes the CACAPO dataset, built for training both neural pipeline and end-to-end data-to-text language generation systems. The dataset is multilingual (Dutch and English), and contains almost 10,000 sentences from human-written news texts in the sports, weather, stocks, and incidents domain, together with aligned attribute-value paired data. The dataset is unique in that the linguistic variation and indirect ways of expressing data in these texts reflect the challenges of real world NLG tasks.
%R 10.18653/v1/2020.inlg-1.10
%U https://aclanthology.org/2020.inlg-1.10
%U https://doi.org/10.18653/v1/2020.inlg-1.10
%P 68-79
Markdown (Informal)
[The CACAPO Dataset: A Multilingual, Multi-Domain Dataset for Neural Pipeline and End-to-End Data-to-Text Generation](https://aclanthology.org/2020.inlg-1.10) (van der Lee et al., INLG 2020)
ACL