@inproceedings{vajda-etal-2025-improving,
title = "Improving {LLM}s for Machine Translation Using Synthetic Preference Data",
author = "Vajda, Dario and
Vre{\v{s}}, Domen and
{\v{S}}ikonja, Marko Robnik",
editor = "Cardoso, Henrique Lopes and
Sousa-Silva, Rui and
Koponen, Maarit and
Pareja-Lora, Antonio",
booktitle = "Proceedings of the 2nd LUHME Workshop",
month = oct,
year = "2025",
address = "Bologna, Italy",
publisher = "LUHME",
url = "https://aclanthology.org/2025.luhme-1.7/",
pages = "67--73",
abstract = "Large language models have emerged as effective machine translation systems. In this paper, we explore how a general instruction-tuned large language model can be improved for machine translation using relatively few easily produced data resources. Using Slovene as a use case, we improve the GaMS-9B-Instruct model using Direct Preference Optimization (DPO) training on a programmatically curated and enhanced subset of a public dataset. As DPO requires pairs of quality-ranked instances, we generated its training dataset by translating English Wikipedia articles using two LLMs, GaMS-9B-Instruct and EuroLLM-9B-Instruct. We ranked the resulting translations based on heuristics coupled with automatic evaluation metrics such as COMET. The evaluation shows that our fine-tuned model outperforms both models involved in the dataset generation. In comparison to the baseline models, the fine-tuned model achieved a COMET score gain of around 0.04 and 0.02, respectively, on translating Wikipedia articles. It also more consistently avoids language and formatting errors."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vajda-etal-2025-improving">
<titleInfo>
<title>Improving LLMs for Machine Translation Using Synthetic Preference Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dario</namePart>
<namePart type="family">Vajda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Domen</namePart>
<namePart type="family">Vreš</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="given">Robnik</namePart>
<namePart type="family">Šikonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd LUHME Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Henrique</namePart>
<namePart type="given">Lopes</namePart>
<namePart type="family">Cardoso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Sousa-Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maarit</namePart>
<namePart type="family">Koponen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Pareja-Lora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>LUHME</publisher>
<place>
<placeTerm type="text">Bologna, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models have emerged as effective machine translation systems. In this paper, we explore how a general instruction-tuned large language model can be improved for machine translation using relatively few easily produced data resources. Using Slovene as a use case, we improve the GaMS-9B-Instruct model using Direct Preference Optimization (DPO) training on a programmatically curated and enhanced subset of a public dataset. As DPO requires pairs of quality-ranked instances, we generated its training dataset by translating English Wikipedia articles using two LLMs, GaMS-9B-Instruct and EuroLLM-9B-Instruct. We ranked the resulting translations based on heuristics coupled with automatic evaluation metrics such as COMET. The evaluation shows that our fine-tuned model outperforms both models involved in the dataset generation. In comparison to the baseline models, the fine-tuned model achieved a COMET score gain of around 0.04 and 0.02, respectively, on translating Wikipedia articles. It also more consistently avoids language and formatting errors.</abstract>
<identifier type="citekey">vajda-etal-2025-improving</identifier>
<location>
<url>https://aclanthology.org/2025.luhme-1.7/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>67</start>
<end>73</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving LLMs for Machine Translation Using Synthetic Preference Data
%A Vajda, Dario
%A Vreš, Domen
%A Šikonja, Marko Robnik
%Y Cardoso, Henrique Lopes
%Y Sousa-Silva, Rui
%Y Koponen, Maarit
%Y Pareja-Lora, Antonio
%S Proceedings of the 2nd LUHME Workshop
%D 2025
%8 October
%I LUHME
%C Bologna, Italy
%F vajda-etal-2025-improving
%X Large language models have emerged as effective machine translation systems. In this paper, we explore how a general instruction-tuned large language model can be improved for machine translation using relatively few easily produced data resources. Using Slovene as a use case, we improve the GaMS-9B-Instruct model using Direct Preference Optimization (DPO) training on a programmatically curated and enhanced subset of a public dataset. As DPO requires pairs of quality-ranked instances, we generated its training dataset by translating English Wikipedia articles using two LLMs, GaMS-9B-Instruct and EuroLLM-9B-Instruct. We ranked the resulting translations based on heuristics coupled with automatic evaluation metrics such as COMET. The evaluation shows that our fine-tuned model outperforms both models involved in the dataset generation. In comparison to the baseline models, the fine-tuned model achieved a COMET score gain of around 0.04 and 0.02, respectively, on translating Wikipedia articles. It also more consistently avoids language and formatting errors.
%U https://aclanthology.org/2025.luhme-1.7/
%P 67-73
Markdown (Informal)
[Improving LLMs for Machine Translation Using Synthetic Preference Data](https://aclanthology.org/2025.luhme-1.7/) (Vajda et al., LUHME 2025)
ACL