@inproceedings{haq-etal-2025-long,
title = "Long-context Reference-based {MT} Quality Estimation",
author = "Haq, Sami and
Osuji, Chinonso and
Castilho, Sheila and
Davis, Brian and
Castro Ferreira, Thiago",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wmt-1.64/",
pages = "905--912",
ISBN = "979-8-89176-341-8",
abstract = "In this paper, we present our submission to the Tenth Conference on Machine Translation (WMT25) Shared Task on Automated Translation Quality Evaluation. Our systems are built upon the COMET framework and trained to predict segment-level ESA scores using augmented long-context data. To construct long-context training examples, we concatenate multiple in-domain sentences and compute a weighted average of their scores. We further integrate human judgment datasets MQM, SQM, and DA) through score normalisation and train multilingual models on the source, hypothesis, and reference translations. Experimental results demonstrate that incorporating long-context information yields higher correlations with human judgments compared to models trained exclusively on short segments."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="haq-etal-2025-long">
<titleInfo>
<title>Long-context Reference-based MT Quality Estimation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sami</namePart>
<namePart type="family">Haq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chinonso</namePart>
<namePart type="family">Osuji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sheila</namePart>
<namePart type="family">Castilho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brian</namePart>
<namePart type="family">Davis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thiago</namePart>
<namePart type="family">Castro Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-341-8</identifier>
</relatedItem>
<abstract>In this paper, we present our submission to the Tenth Conference on Machine Translation (WMT25) Shared Task on Automated Translation Quality Evaluation. Our systems are built upon the COMET framework and trained to predict segment-level ESA scores using augmented long-context data. To construct long-context training examples, we concatenate multiple in-domain sentences and compute a weighted average of their scores. We further integrate human judgment datasets MQM, SQM, and DA) through score normalisation and train multilingual models on the source, hypothesis, and reference translations. Experimental results demonstrate that incorporating long-context information yields higher correlations with human judgments compared to models trained exclusively on short segments.</abstract>
<identifier type="citekey">haq-etal-2025-long</identifier>
<location>
<url>https://aclanthology.org/2025.wmt-1.64/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>905</start>
<end>912</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Long-context Reference-based MT Quality Estimation
%A Haq, Sami
%A Osuji, Chinonso
%A Castilho, Sheila
%A Davis, Brian
%A Castro Ferreira, Thiago
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Tenth Conference on Machine Translation
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-341-8
%F haq-etal-2025-long
%X In this paper, we present our submission to the Tenth Conference on Machine Translation (WMT25) Shared Task on Automated Translation Quality Evaluation. Our systems are built upon the COMET framework and trained to predict segment-level ESA scores using augmented long-context data. To construct long-context training examples, we concatenate multiple in-domain sentences and compute a weighted average of their scores. We further integrate human judgment datasets MQM, SQM, and DA) through score normalisation and train multilingual models on the source, hypothesis, and reference translations. Experimental results demonstrate that incorporating long-context information yields higher correlations with human judgments compared to models trained exclusively on short segments.
%U https://aclanthology.org/2025.wmt-1.64/
%P 905-912
Markdown (Informal)
[Long-context Reference-based MT Quality Estimation](https://aclanthology.org/2025.wmt-1.64/) (Haq et al., WMT 2025)
ACL
- Sami Haq, Chinonso Osuji, Sheila Castilho, Brian Davis, and Thiago Castro Ferreira. 2025. Long-context Reference-based MT Quality Estimation. In Proceedings of the Tenth Conference on Machine Translation, pages 905–912, Suzhou, China. Association for Computational Linguistics.