@inproceedings{gajakos-etal-2024-setu,
title = "The {SETU}-{ADAPT} Submissions to the {WMT}24 Low-Resource {I}ndic Language Translation Task",
author = "Gajakos, Neha and
Nayak, Prashanth and
Haque, Rejwanul and
Way, Andy",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.67",
pages = "762--769",
abstract = "This paper presents the SETU-ADAPT{'}s submissions to the WMT 2024 Low-Resource Indic Language Translation task. We participated in the unconstrained segment of the task, focusing on the Assamese-to-English and English-to-Assamese language pairs. Our approach involves leveraging Large Language Models (LLMs) as the baseline systems for all our MT tasks. Furthermore, we applied various strategies to improve the baseline systems. In our first approach, we fine-tuned LLMs using all the data provided by the task organisers. Our second approach explores in-context learning by focusing on few-shot prompting. In our final approach we explore an efficient data extraction technique based on a fuzzy match-based similarity measure for fine-tuning. We evaluated our systems using BLEU, chrF, WER, and COMET. The experimental results showed that our strategies can effectively improve the quality of translations in low-resource scenarios.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gajakos-etal-2024-setu">
<titleInfo>
<title>The SETU-ADAPT Submissions to the WMT24 Low-Resource Indic Language Translation Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neha</namePart>
<namePart type="family">Gajakos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prashanth</namePart>
<namePart type="family">Nayak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rejwanul</namePart>
<namePart type="family">Haque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Way</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the SETU-ADAPT’s submissions to the WMT 2024 Low-Resource Indic Language Translation task. We participated in the unconstrained segment of the task, focusing on the Assamese-to-English and English-to-Assamese language pairs. Our approach involves leveraging Large Language Models (LLMs) as the baseline systems for all our MT tasks. Furthermore, we applied various strategies to improve the baseline systems. In our first approach, we fine-tuned LLMs using all the data provided by the task organisers. Our second approach explores in-context learning by focusing on few-shot prompting. In our final approach we explore an efficient data extraction technique based on a fuzzy match-based similarity measure for fine-tuning. We evaluated our systems using BLEU, chrF, WER, and COMET. The experimental results showed that our strategies can effectively improve the quality of translations in low-resource scenarios.</abstract>
<identifier type="citekey">gajakos-etal-2024-setu</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.67</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>762</start>
<end>769</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The SETU-ADAPT Submissions to the WMT24 Low-Resource Indic Language Translation Task
%A Gajakos, Neha
%A Nayak, Prashanth
%A Haque, Rejwanul
%A Way, Andy
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F gajakos-etal-2024-setu
%X This paper presents the SETU-ADAPT’s submissions to the WMT 2024 Low-Resource Indic Language Translation task. We participated in the unconstrained segment of the task, focusing on the Assamese-to-English and English-to-Assamese language pairs. Our approach involves leveraging Large Language Models (LLMs) as the baseline systems for all our MT tasks. Furthermore, we applied various strategies to improve the baseline systems. In our first approach, we fine-tuned LLMs using all the data provided by the task organisers. Our second approach explores in-context learning by focusing on few-shot prompting. In our final approach we explore an efficient data extraction technique based on a fuzzy match-based similarity measure for fine-tuning. We evaluated our systems using BLEU, chrF, WER, and COMET. The experimental results showed that our strategies can effectively improve the quality of translations in low-resource scenarios.
%U https://aclanthology.org/2024.wmt-1.67
%P 762-769
Markdown (Informal)
[The SETU-ADAPT Submissions to the WMT24 Low-Resource Indic Language Translation Task](https://aclanthology.org/2024.wmt-1.67) (Gajakos et al., WMT 2024)
ACL