@inproceedings{choshen-abend-2018-inherent,
title = "Inherent Biases in Reference-based Evaluation for Grammatical Error Correction",
author = "Choshen, Leshem and
Abend, Omri",
editor = "Gurevych, Iryna and
Miyao, Yusuke",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-1059",
doi = "10.18653/v1/P18-1059",
pages = "632--642",
abstract = "The prevalent use of too few references for evaluating text-to-text generation is known to bias estimates of their quality (henceforth, low coverage bias or LCB). This paper shows that overcoming LCB in Grammatical Error Correction (GEC) evaluation cannot be attained by re-scaling or by increasing the number of references in any feasible range, contrary to previous suggestions. This is due to the long-tailed distribution of valid corrections for a sentence. Concretely, we show that LCB incentivizes GEC systems to avoid correcting even when they can generate a valid correction. Consequently, existing systems obtain comparable or superior performance compared to humans, by making few but targeted changes to the input. Similar effects on Text Simplification further support our claims.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="choshen-abend-2018-inherent">
<titleInfo>
<title>Inherent Biases in Reference-based Evaluation for Grammatical Error Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leshem</namePart>
<namePart type="family">Choshen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Miyao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The prevalent use of too few references for evaluating text-to-text generation is known to bias estimates of their quality (henceforth, low coverage bias or LCB). This paper shows that overcoming LCB in Grammatical Error Correction (GEC) evaluation cannot be attained by re-scaling or by increasing the number of references in any feasible range, contrary to previous suggestions. This is due to the long-tailed distribution of valid corrections for a sentence. Concretely, we show that LCB incentivizes GEC systems to avoid correcting even when they can generate a valid correction. Consequently, existing systems obtain comparable or superior performance compared to humans, by making few but targeted changes to the input. Similar effects on Text Simplification further support our claims.</abstract>
<identifier type="citekey">choshen-abend-2018-inherent</identifier>
<identifier type="doi">10.18653/v1/P18-1059</identifier>
<location>
<url>https://aclanthology.org/P18-1059</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>632</start>
<end>642</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Inherent Biases in Reference-based Evaluation for Grammatical Error Correction
%A Choshen, Leshem
%A Abend, Omri
%Y Gurevych, Iryna
%Y Miyao, Yusuke
%S Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F choshen-abend-2018-inherent
%X The prevalent use of too few references for evaluating text-to-text generation is known to bias estimates of their quality (henceforth, low coverage bias or LCB). This paper shows that overcoming LCB in Grammatical Error Correction (GEC) evaluation cannot be attained by re-scaling or by increasing the number of references in any feasible range, contrary to previous suggestions. This is due to the long-tailed distribution of valid corrections for a sentence. Concretely, we show that LCB incentivizes GEC systems to avoid correcting even when they can generate a valid correction. Consequently, existing systems obtain comparable or superior performance compared to humans, by making few but targeted changes to the input. Similar effects on Text Simplification further support our claims.
%R 10.18653/v1/P18-1059
%U https://aclanthology.org/P18-1059
%U https://doi.org/10.18653/v1/P18-1059
%P 632-642
Markdown (Informal)
[Inherent Biases in Reference-based Evaluation for Grammatical Error Correction](https://aclanthology.org/P18-1059) (Choshen & Abend, ACL 2018)
ACL