@inproceedings{hrabal-etal-2025-cuni,
title = "{CUNI} and Phrase at {WMT}25 {MT} Evaluation Task",
author = "Hrabal, Miroslav and
Glembek, Ondrej and
Tamchyna, Ale{\v{s}} and
Hildebrand, Almut Silja and
Eckhard, Alan and
{\v{S}}tola, Miroslav and
Penkale, Sergio and
{\v{S}}ime{\v{c}}kov{\'a}, Zuzana and
Bojar, Ond{\v{r}}ej and
Lavie, Alon and
Stewart, Craig",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wmt-1.68/",
pages = "934--944",
ISBN = "979-8-89176-341-8",
abstract = "This paper describes the joint effort of Phrase a.s. and Charles University{'}sInstitute of Formal and Applied Linguistics (CUNI/UFAL) on the WMT25Automated Translation Quality Evaluation Systems Shared Task. Both teamsparticipated both in a collaborative and competitive manner, i.e. they eachsubmitted a system of their own as well as a contrastive joint system ensemble.In Task{\textasciitilde}1, we show that such an ensembling{---}if chosen in a clever way{---}canlead to a performance boost. We present the analysis of various kinds ofsystems comprising both ``traditional'' NN-based approach, as well as differentflavours of LLMs{---}off-the-shelf commercial models, their fine-tuned versions,but also in-house, custom-trained alternative models. In Tasks{\textasciitilde}2 and{\textasciitilde}3 we showPhrase{'}s approach to tackling the tasks via various GPT models: Error SpanAnnotation via the complete MQM solution using non-reasoning models (includingfine-tuned versions) in Task{\textasciitilde}2, and using reasoning models in Task{\textasciitilde}3."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hrabal-etal-2025-cuni">
<titleInfo>
<title>CUNI and Phrase at WMT25 MT Evaluation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miroslav</namePart>
<namePart type="family">Hrabal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondrej</namePart>
<namePart type="family">Glembek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aleš</namePart>
<namePart type="family">Tamchyna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Almut</namePart>
<namePart type="given">Silja</namePart>
<namePart type="family">Hildebrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Eckhard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miroslav</namePart>
<namePart type="family">Štola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergio</namePart>
<namePart type="family">Penkale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zuzana</namePart>
<namePart type="family">Šimečková</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ondřej</namePart>
<namePart type="family">Bojar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Lavie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Craig</namePart>
<namePart type="family">Stewart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-341-8</identifier>
</relatedItem>
<abstract>This paper describes the joint effort of Phrase a.s. and Charles University’sInstitute of Formal and Applied Linguistics (CUNI/UFAL) on the WMT25Automated Translation Quality Evaluation Systems Shared Task. Both teamsparticipated both in a collaborative and competitive manner, i.e. they eachsubmitted a system of their own as well as a contrastive joint system ensemble.In Task~1, we show that such an ensembling—if chosen in a clever way—canlead to a performance boost. We present the analysis of various kinds ofsystems comprising both “traditional” NN-based approach, as well as differentflavours of LLMs—off-the-shelf commercial models, their fine-tuned versions,but also in-house, custom-trained alternative models. In Tasks~2 and~3 we showPhrase’s approach to tackling the tasks via various GPT models: Error SpanAnnotation via the complete MQM solution using non-reasoning models (includingfine-tuned versions) in Task~2, and using reasoning models in Task~3.</abstract>
<identifier type="citekey">hrabal-etal-2025-cuni</identifier>
<location>
<url>https://aclanthology.org/2025.wmt-1.68/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>934</start>
<end>944</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CUNI and Phrase at WMT25 MT Evaluation Task
%A Hrabal, Miroslav
%A Glembek, Ondrej
%A Tamchyna, Aleš
%A Hildebrand, Almut Silja
%A Eckhard, Alan
%A Štola, Miroslav
%A Penkale, Sergio
%A Šimečková, Zuzana
%A Bojar, Ondřej
%A Lavie, Alon
%A Stewart, Craig
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Tenth Conference on Machine Translation
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-341-8
%F hrabal-etal-2025-cuni
%X This paper describes the joint effort of Phrase a.s. and Charles University’sInstitute of Formal and Applied Linguistics (CUNI/UFAL) on the WMT25Automated Translation Quality Evaluation Systems Shared Task. Both teamsparticipated both in a collaborative and competitive manner, i.e. they eachsubmitted a system of their own as well as a contrastive joint system ensemble.In Task~1, we show that such an ensembling—if chosen in a clever way—canlead to a performance boost. We present the analysis of various kinds ofsystems comprising both “traditional” NN-based approach, as well as differentflavours of LLMs—off-the-shelf commercial models, their fine-tuned versions,but also in-house, custom-trained alternative models. In Tasks~2 and~3 we showPhrase’s approach to tackling the tasks via various GPT models: Error SpanAnnotation via the complete MQM solution using non-reasoning models (includingfine-tuned versions) in Task~2, and using reasoning models in Task~3.
%U https://aclanthology.org/2025.wmt-1.68/
%P 934-944
Markdown (Informal)
[CUNI and Phrase at WMT25 MT Evaluation Task](https://aclanthology.org/2025.wmt-1.68/) (Hrabal et al., WMT 2025)
ACL
- Miroslav Hrabal, Ondrej Glembek, Aleš Tamchyna, Almut Silja Hildebrand, Alan Eckhard, Miroslav Štola, Sergio Penkale, Zuzana Šimečková, Ondřej Bojar, Alon Lavie, and Craig Stewart. 2025. CUNI and Phrase at WMT25 MT Evaluation Task. In Proceedings of the Tenth Conference on Machine Translation, pages 934–944, Suzhou, China. Association for Computational Linguistics.