@inproceedings{ponce-etal-2024-vicomtech,
title = "Vicomtech@{WMT} 2024: Shared Task on Translation into Low-Resource Languages of {S}pain",
author = "Ponce, David and
Gete, Harritxu and
Etchegoyhen, Thierry",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.91",
pages = "934--942",
abstract = "We describe Vicomtech{'}s participation in the WMT 2024 Shared Task on translation into low-resource languages of Spain. We addressed all three languages of the task, namely Aragonese, Aranese and Asturian, in both constrained and open settings. Our work mainly centred on exploiting different types of corpora via data filtering, selection and combination methods, along with synthetic data generated with translation models based on rules, neural sequence-to-sequence or large language models. We improved or matched the best baselines in all three language pairs and present complementary results on additional test sets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ponce-etal-2024-vicomtech">
<titleInfo>
<title>Vicomtech@WMT 2024: Shared Task on Translation into Low-Resource Languages of Spain</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Ponce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harritxu</namePart>
<namePart type="family">Gete</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Etchegoyhen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe Vicomtech’s participation in the WMT 2024 Shared Task on translation into low-resource languages of Spain. We addressed all three languages of the task, namely Aragonese, Aranese and Asturian, in both constrained and open settings. Our work mainly centred on exploiting different types of corpora via data filtering, selection and combination methods, along with synthetic data generated with translation models based on rules, neural sequence-to-sequence or large language models. We improved or matched the best baselines in all three language pairs and present complementary results on additional test sets.</abstract>
<identifier type="citekey">ponce-etal-2024-vicomtech</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.91</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>934</start>
<end>942</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Vicomtech@WMT 2024: Shared Task on Translation into Low-Resource Languages of Spain
%A Ponce, David
%A Gete, Harritxu
%A Etchegoyhen, Thierry
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F ponce-etal-2024-vicomtech
%X We describe Vicomtech’s participation in the WMT 2024 Shared Task on translation into low-resource languages of Spain. We addressed all three languages of the task, namely Aragonese, Aranese and Asturian, in both constrained and open settings. Our work mainly centred on exploiting different types of corpora via data filtering, selection and combination methods, along with synthetic data generated with translation models based on rules, neural sequence-to-sequence or large language models. We improved or matched the best baselines in all three language pairs and present complementary results on additional test sets.
%U https://aclanthology.org/2024.wmt-1.91
%P 934-942
Markdown (Informal)
[Vicomtech@WMT 2024: Shared Task on Translation into Low-Resource Languages of Spain](https://aclanthology.org/2024.wmt-1.91) (Ponce et al., WMT 2024)
ACL