@inproceedings{thomson-reiter-2022-accuracy,
title = "The Accuracy Evaluation Shared Task as a Retrospective Reproduction Study",
author = "Thomson, Craig and
Reiter, Ehud",
editor = "Shaikh, Samira and
Ferreira, Thiago and
Stent, Amanda",
booktitle = "Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges",
month = jul,
year = "2022",
address = "Waterville, Maine, USA and virtual meeting",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.inlg-genchal.11",
pages = "71--79",
abstract = "We investigate the data collected for the Accuracy Evaluation Shared Task as a retrospective reproduction study. The shared task was based upon errors found by human annotation of computer generated summaries of basketball games. Annotation was performed in three separate stages, with texts taken from the same three systems and checked for errors by the same three annotators. We show that the mean count of errors was consistent at the highest level for each experiment, with increased variance when looking at per-system and/or per-error- type breakdowns.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="thomson-reiter-2022-accuracy">
<titleInfo>
<title>The Accuracy Evaluation Shared Task as a Retrospective Reproduction Study</title>
</titleInfo>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Thomson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samira</namePart>
<namePart type="family">Shaikh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amanda</namePart>
<namePart type="family">Stent</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Waterville, Maine, USA and virtual meeting</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate the data collected for the Accuracy Evaluation Shared Task as a retrospective reproduction study. The shared task was based upon errors found by human annotation of computer generated summaries of basketball games. Annotation was performed in three separate stages, with texts taken from the same three systems and checked for errors by the same three annotators. We show that the mean count of errors was consistent at the highest level for each experiment, with increased variance when looking at per-system and/or per-error- type breakdowns.</abstract>
<identifier type="citekey">thomson-reiter-2022-accuracy</identifier>
<location>
<url>https://aclanthology.org/2022.inlg-genchal.11</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>71</start>
<end>79</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Accuracy Evaluation Shared Task as a Retrospective Reproduction Study
%A Thomson, Craig
%A Reiter, Ehud
%Y Shaikh, Samira
%Y Ferreira, Thiago
%Y Stent, Amanda
%S Proceedings of the 15th International Conference on Natural Language Generation: Generation Challenges
%D 2022
%8 July
%I Association for Computational Linguistics
%C Waterville, Maine, USA and virtual meeting
%F thomson-reiter-2022-accuracy
%X We investigate the data collected for the Accuracy Evaluation Shared Task as a retrospective reproduction study. The shared task was based upon errors found by human annotation of computer generated summaries of basketball games. Annotation was performed in three separate stages, with texts taken from the same three systems and checked for errors by the same three annotators. We show that the mean count of errors was consistent at the highest level for each experiment, with increased variance when looking at per-system and/or per-error- type breakdowns.
%U https://aclanthology.org/2022.inlg-genchal.11
%P 71-79
Markdown (Informal)
[The Accuracy Evaluation Shared Task as a Retrospective Reproduction Study](https://aclanthology.org/2022.inlg-genchal.11) (Thomson & Reiter, INLG 2022)
ACL