@inproceedings{wang-2019-revisiting,
title = "Revisiting Challenges in Data-to-Text Generation with Fact Grounding",
author = "Wang, Hongmin",
editor = "van Deemter, Kees and
Lin, Chenghua and
Takamura, Hiroya",
booktitle = "Proceedings of the 12th International Conference on Natural Language Generation",
month = oct # "{--}" # nov,
year = "2019",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-8639",
doi = "10.18653/v1/W19-8639",
pages = "311--322",
abstract = "Data-to-text generation models face challenges in ensuring data fidelity by referring to the correct input source. To inspire studies in this area, Wiseman et al. (2017) introduced the RotoWire corpus on generating NBA game summaries from the box- and line-score tables. However, limited attempts have been made in this direction and the challenges remain. We observe a prominent bottleneck in the corpus where only about 60{\%} of the summary contents can be grounded to the boxscore records. Such information deficiency tends to misguide a conditioned language model to produce unconditioned random facts and thus leads to factual hallucinations. In this work, we restore the information balance and revamp this task to focus on fact-grounded data-to-text generation. We introduce a purified and larger-scale dataset, RotoWire-FG (Fact-Grounding), with 50{\%} more data from the year 2017-19 and enriched input tables, and hope to attract research focuses in this direction. Moreover, we achieve improved data fidelity over the state-of-the-art models by integrating a new form of table reconstruction as an auxiliary task to boost the generation quality.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-2019-revisiting">
<titleInfo>
<title>Revisiting Challenges in Data-to-Text Generation with Fact Grounding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongmin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-oct–nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenghua</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data-to-text generation models face challenges in ensuring data fidelity by referring to the correct input source. To inspire studies in this area, Wiseman et al. (2017) introduced the RotoWire corpus on generating NBA game summaries from the box- and line-score tables. However, limited attempts have been made in this direction and the challenges remain. We observe a prominent bottleneck in the corpus where only about 60% of the summary contents can be grounded to the boxscore records. Such information deficiency tends to misguide a conditioned language model to produce unconditioned random facts and thus leads to factual hallucinations. In this work, we restore the information balance and revamp this task to focus on fact-grounded data-to-text generation. We introduce a purified and larger-scale dataset, RotoWire-FG (Fact-Grounding), with 50% more data from the year 2017-19 and enriched input tables, and hope to attract research focuses in this direction. Moreover, we achieve improved data fidelity over the state-of-the-art models by integrating a new form of table reconstruction as an auxiliary task to boost the generation quality.</abstract>
<identifier type="citekey">wang-2019-revisiting</identifier>
<identifier type="doi">10.18653/v1/W19-8639</identifier>
<location>
<url>https://aclanthology.org/W19-8639</url>
</location>
<part>
<date>2019-oct–nov</date>
<extent unit="page">
<start>311</start>
<end>322</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revisiting Challenges in Data-to-Text Generation with Fact Grounding
%A Wang, Hongmin
%Y van Deemter, Kees
%Y Lin, Chenghua
%Y Takamura, Hiroya
%S Proceedings of the 12th International Conference on Natural Language Generation
%D 2019
%8 oct–nov
%I Association for Computational Linguistics
%C Tokyo, Japan
%F wang-2019-revisiting
%X Data-to-text generation models face challenges in ensuring data fidelity by referring to the correct input source. To inspire studies in this area, Wiseman et al. (2017) introduced the RotoWire corpus on generating NBA game summaries from the box- and line-score tables. However, limited attempts have been made in this direction and the challenges remain. We observe a prominent bottleneck in the corpus where only about 60% of the summary contents can be grounded to the boxscore records. Such information deficiency tends to misguide a conditioned language model to produce unconditioned random facts and thus leads to factual hallucinations. In this work, we restore the information balance and revamp this task to focus on fact-grounded data-to-text generation. We introduce a purified and larger-scale dataset, RotoWire-FG (Fact-Grounding), with 50% more data from the year 2017-19 and enriched input tables, and hope to attract research focuses in this direction. Moreover, we achieve improved data fidelity over the state-of-the-art models by integrating a new form of table reconstruction as an auxiliary task to boost the generation quality.
%R 10.18653/v1/W19-8639
%U https://aclanthology.org/W19-8639
%U https://doi.org/10.18653/v1/W19-8639
%P 311-322
Markdown (Informal)
[Revisiting Challenges in Data-to-Text Generation with Fact Grounding](https://aclanthology.org/W19-8639) (Wang, INLG 2019)
ACL