@inproceedings{goto-etal-2025-improving,
title = "Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction",
author = "Goto, Takumi and
Vasselli, Justin and
Watanabe, Taro",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.77/",
doi = "10.18653/v1/2025.acl-srw.77",
pages = "1004--1015",
ISBN = "979-8-89176-254-1",
abstract = "Various evaluation metrics have been proposed for Grammatical Error Correction (GEC), but many, particularly reference-free metrics, lack explainability. This lack of explainability hinders researchers from analyzing the strengths and weaknesses of GEC models and limits the ability to provide detailed feedback for users. To address this issue, we propose attributing sentence-level scores to individual edits, providing insight into how specific corrections contribute to the overall performance. For the attribution method, we use Shapley values, from cooperative game theory, to compute the contribution of each edit. Experiments with existing sentence-level metrics demonstrate high consistency across different edit granularities and show approximately 70{\%} alignment with human evaluations. In addition, we analyze biases in the metrics based on the attribution results, revealing trends such as the tendency to ignore orthographic edits."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goto-etal-2025-improving">
<titleInfo>
<title>Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takumi</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Justin</namePart>
<namePart type="family">Vasselli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>Various evaluation metrics have been proposed for Grammatical Error Correction (GEC), but many, particularly reference-free metrics, lack explainability. This lack of explainability hinders researchers from analyzing the strengths and weaknesses of GEC models and limits the ability to provide detailed feedback for users. To address this issue, we propose attributing sentence-level scores to individual edits, providing insight into how specific corrections contribute to the overall performance. For the attribution method, we use Shapley values, from cooperative game theory, to compute the contribution of each edit. Experiments with existing sentence-level metrics demonstrate high consistency across different edit granularities and show approximately 70% alignment with human evaluations. In addition, we analyze biases in the metrics based on the attribution results, revealing trends such as the tendency to ignore orthographic edits.</abstract>
<identifier type="citekey">goto-etal-2025-improving</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.77</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.77/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>1004</start>
<end>1015</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction
%A Goto, Takumi
%A Vasselli, Justin
%A Watanabe, Taro
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F goto-etal-2025-improving
%X Various evaluation metrics have been proposed for Grammatical Error Correction (GEC), but many, particularly reference-free metrics, lack explainability. This lack of explainability hinders researchers from analyzing the strengths and weaknesses of GEC models and limits the ability to provide detailed feedback for users. To address this issue, we propose attributing sentence-level scores to individual edits, providing insight into how specific corrections contribute to the overall performance. For the attribution method, we use Shapley values, from cooperative game theory, to compute the contribution of each edit. Experiments with existing sentence-level metrics demonstrate high consistency across different edit granularities and show approximately 70% alignment with human evaluations. In addition, we analyze biases in the metrics based on the attribution results, revealing trends such as the tendency to ignore orthographic edits.
%R 10.18653/v1/2025.acl-srw.77
%U https://aclanthology.org/2025.acl-srw.77/
%U https://doi.org/10.18653/v1/2025.acl-srw.77
%P 1004-1015
Markdown (Informal)
[Improving Explainability of Sentence-level Metrics via Edit-level Attribution for Grammatical Error Correction](https://aclanthology.org/2025.acl-srw.77/) (Goto et al., ACL 2025)
ACL