@inproceedings{von-daniken-etal-2025-measure,
title = "A Measure of the System Dependence of Automated Metrics",
author = {Von D{\"a}niken, Pius and
Deriu, Jan Milan and
Cieliebak, Mark},
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-short.8/",
doi = "10.18653/v1/2025.acl-short.8",
pages = "87--99",
ISBN = "979-8-89176-252-7",
abstract = "Automated metrics for Machine Translation have made significant progress, with the goal of replacing expensive and time-consuming human evaluations. These metrics are typically assessed by their correlation with human judgments, which captures the monotonic relationship between human and metric scores. However, we argue that it is equally important to ensure that metrics treat all systems fairly and consistently. In this paper, we introduce a method to evaluate this aspect."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="von-daniken-etal-2025-measure">
<titleInfo>
<title>A Measure of the System Dependence of Automated Metrics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pius</namePart>
<namePart type="family">Von Däniken</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Milan</namePart>
<namePart type="family">Deriu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Cieliebak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-252-7</identifier>
</relatedItem>
<abstract>Automated metrics for Machine Translation have made significant progress, with the goal of replacing expensive and time-consuming human evaluations. These metrics are typically assessed by their correlation with human judgments, which captures the monotonic relationship between human and metric scores. However, we argue that it is equally important to ensure that metrics treat all systems fairly and consistently. In this paper, we introduce a method to evaluate this aspect.</abstract>
<identifier type="citekey">von-daniken-etal-2025-measure</identifier>
<identifier type="doi">10.18653/v1/2025.acl-short.8</identifier>
<location>
<url>https://aclanthology.org/2025.acl-short.8/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>87</start>
<end>99</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Measure of the System Dependence of Automated Metrics
%A Von Däniken, Pius
%A Deriu, Jan Milan
%A Cieliebak, Mark
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-252-7
%F von-daniken-etal-2025-measure
%X Automated metrics for Machine Translation have made significant progress, with the goal of replacing expensive and time-consuming human evaluations. These metrics are typically assessed by their correlation with human judgments, which captures the monotonic relationship between human and metric scores. However, we argue that it is equally important to ensure that metrics treat all systems fairly and consistently. In this paper, we introduce a method to evaluate this aspect.
%R 10.18653/v1/2025.acl-short.8
%U https://aclanthology.org/2025.acl-short.8/
%U https://doi.org/10.18653/v1/2025.acl-short.8
%P 87-99
Markdown (Informal)
[A Measure of the System Dependence of Automated Metrics](https://aclanthology.org/2025.acl-short.8/) (Von Däniken et al., ACL 2025)
ACL
- Pius Von Däniken, Jan Milan Deriu, and Mark Cieliebak. 2025. A Measure of the System Dependence of Automated Metrics. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 87–99, Vienna, Austria. Association for Computational Linguistics.