@inproceedings{karpachev-etal-2025-yandex,
title = "{Y}andex Submission to the {WMT}25 General Machine Translation Task",
author = "Karpachev, Nikolay and
Enikeeva, Ekaterina and
Popov, Dmitry and
Bulgakov, Arsenii and
Panteleev, Daniil and
Ulianov, Dmitrii and
Kryukov, Artem and
Mekhraliev, Artem",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Tenth Conference on Machine Translation",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wmt-1.50/",
pages = "740--752",
ISBN = "979-8-89176-341-8",
abstract = "This paper describes Yandex submission to the WMT25 General Machine Translation task. We participate in English-to-Russian translation direction and propose a purely LLM-based translation model. Our training procedure comprises a training pipeline of several stages built upon YandexGPT, an in-house general-purpose LLM. In particular, firstly, we employ continual pretraining (post-pretrain) for MT task for initial adaptation to multilinguality and translation. Subsequently, we use SFT on parallel document-level corpus in the form of P-Tuning. Following SFT, we propose a novel alignment scheme of two stages, the first one being a curriculum learning with difficulty schedule and a second one - training the model for tag preservation and error correction with human post-edits as training samples. Our model achieves results comparable to human reference translations on multiple domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="karpachev-etal-2025-yandex">
<titleInfo>
<title>Yandex Submission to the WMT25 General Machine Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikolay</namePart>
<namePart type="family">Karpachev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Enikeeva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Popov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arsenii</namePart>
<namePart type="family">Bulgakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Panteleev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dmitrii</namePart>
<namePart type="family">Ulianov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Kryukov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artem</namePart>
<namePart type="family">Mekhraliev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-341-8</identifier>
</relatedItem>
<abstract>This paper describes Yandex submission to the WMT25 General Machine Translation task. We participate in English-to-Russian translation direction and propose a purely LLM-based translation model. Our training procedure comprises a training pipeline of several stages built upon YandexGPT, an in-house general-purpose LLM. In particular, firstly, we employ continual pretraining (post-pretrain) for MT task for initial adaptation to multilinguality and translation. Subsequently, we use SFT on parallel document-level corpus in the form of P-Tuning. Following SFT, we propose a novel alignment scheme of two stages, the first one being a curriculum learning with difficulty schedule and a second one - training the model for tag preservation and error correction with human post-edits as training samples. Our model achieves results comparable to human reference translations on multiple domains.</abstract>
<identifier type="citekey">karpachev-etal-2025-yandex</identifier>
<location>
<url>https://aclanthology.org/2025.wmt-1.50/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>740</start>
<end>752</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Yandex Submission to the WMT25 General Machine Translation Task
%A Karpachev, Nikolay
%A Enikeeva, Ekaterina
%A Popov, Dmitry
%A Bulgakov, Arsenii
%A Panteleev, Daniil
%A Ulianov, Dmitrii
%A Kryukov, Artem
%A Mekhraliev, Artem
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Tenth Conference on Machine Translation
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-341-8
%F karpachev-etal-2025-yandex
%X This paper describes Yandex submission to the WMT25 General Machine Translation task. We participate in English-to-Russian translation direction and propose a purely LLM-based translation model. Our training procedure comprises a training pipeline of several stages built upon YandexGPT, an in-house general-purpose LLM. In particular, firstly, we employ continual pretraining (post-pretrain) for MT task for initial adaptation to multilinguality and translation. Subsequently, we use SFT on parallel document-level corpus in the form of P-Tuning. Following SFT, we propose a novel alignment scheme of two stages, the first one being a curriculum learning with difficulty schedule and a second one - training the model for tag preservation and error correction with human post-edits as training samples. Our model achieves results comparable to human reference translations on multiple domains.
%U https://aclanthology.org/2025.wmt-1.50/
%P 740-752
Markdown (Informal)
[Yandex Submission to the WMT25 General Machine Translation Task](https://aclanthology.org/2025.wmt-1.50/) (Karpachev et al., WMT 2025)
ACL
- Nikolay Karpachev, Ekaterina Enikeeva, Dmitry Popov, Arsenii Bulgakov, Daniil Panteleev, Dmitrii Ulianov, Artem Kryukov, and Artem Mekhraliev. 2025. Yandex Submission to the WMT25 General Machine Translation Task. In Proceedings of the Tenth Conference on Machine Translation, pages 740–752, Suzhou, China. Association for Computational Linguistics.