@inproceedings{de-gibert-etal-2024-hybrid,
title = "Hybrid Distillation from {RBMT} and {NMT}: {H}elsinki-{NLP}{'}s Submission to the Shared Task on Translation into Low-Resource Languages of {S}pain",
author = {De Gibert, Ona and
Aulamo, Mikko and
Scherrer, Yves and
Tiedemann, J{\"o}rg},
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.88",
pages = "908--917",
abstract = "The Helsinki-NLP team participated in the 2024 Shared Task on Translation into Low-Resource languages of Spain with four multilingual systems covering all language pairs. The task consists in developing Machine Translation (MT) models to translate from Spanish into Aragonese, Aranese and Asturian. Our models leverage known approaches for multilingual MT, namely, data filtering, fine-tuning, data tagging, and distillation. We use distillation to merge the knowledge from neural and rule-based systems and explore the trade-offs between translation quality and computational efficiency. We demonstrate that our distilled models can achieve competitive results while significantly reducing computational costs. Our best models ranked 4th, 5th, and 2nd in the open submission track for Spanish{--}Aragonese, Spanish{--}Aranese, and Spanish{--}Asturian, respectively. We release our code and data publicly at https://github.com/Helsinki-NLP/lowres-spain-st.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-gibert-etal-2024-hybrid">
<titleInfo>
<title>Hybrid Distillation from RBMT and NMT: Helsinki-NLP’s Submission to the Shared Task on Translation into Low-Resource Languages of Spain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">De Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikko</namePart>
<namePart type="family">Aulamo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Helsinki-NLP team participated in the 2024 Shared Task on Translation into Low-Resource languages of Spain with four multilingual systems covering all language pairs. The task consists in developing Machine Translation (MT) models to translate from Spanish into Aragonese, Aranese and Asturian. Our models leverage known approaches for multilingual MT, namely, data filtering, fine-tuning, data tagging, and distillation. We use distillation to merge the knowledge from neural and rule-based systems and explore the trade-offs between translation quality and computational efficiency. We demonstrate that our distilled models can achieve competitive results while significantly reducing computational costs. Our best models ranked 4th, 5th, and 2nd in the open submission track for Spanish–Aragonese, Spanish–Aranese, and Spanish–Asturian, respectively. We release our code and data publicly at https://github.com/Helsinki-NLP/lowres-spain-st.</abstract>
<identifier type="citekey">de-gibert-etal-2024-hybrid</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.88</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>908</start>
<end>917</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Hybrid Distillation from RBMT and NMT: Helsinki-NLP’s Submission to the Shared Task on Translation into Low-Resource Languages of Spain
%A De Gibert, Ona
%A Aulamo, Mikko
%A Scherrer, Yves
%A Tiedemann, Jörg
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F de-gibert-etal-2024-hybrid
%X The Helsinki-NLP team participated in the 2024 Shared Task on Translation into Low-Resource languages of Spain with four multilingual systems covering all language pairs. The task consists in developing Machine Translation (MT) models to translate from Spanish into Aragonese, Aranese and Asturian. Our models leverage known approaches for multilingual MT, namely, data filtering, fine-tuning, data tagging, and distillation. We use distillation to merge the knowledge from neural and rule-based systems and explore the trade-offs between translation quality and computational efficiency. We demonstrate that our distilled models can achieve competitive results while significantly reducing computational costs. Our best models ranked 4th, 5th, and 2nd in the open submission track for Spanish–Aragonese, Spanish–Aranese, and Spanish–Asturian, respectively. We release our code and data publicly at https://github.com/Helsinki-NLP/lowres-spain-st.
%U https://aclanthology.org/2024.wmt-1.88
%P 908-917
Markdown (Informal)
[Hybrid Distillation from RBMT and NMT: Helsinki-NLP’s Submission to the Shared Task on Translation into Low-Resource Languages of Spain](https://aclanthology.org/2024.wmt-1.88) (De Gibert et al., WMT 2024)
ACL