@inproceedings{rajpoot-etal-2024-multimodal,
title = "Multimodal Machine Translation for Low-Resource {I}ndic Languages: A Chain-of-Thought Approach Using Large Language Models",
author = "Rajpoot, Pawan and
Bhat, Nagaraj and
Shrivastava, Ashish",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.79",
pages = "833--838",
abstract = "This paper presents the approach and results of team v036 in the English-to-Low-Resource Multi-Modal Translation Task at the Ninth Conference on Machine Translation (WMT24). Our team tackled the challenge of translating English source text to low-resource Indic languages, specifically Hindi, Malayalam, and Bengali, while leveraging visual context provided alongside the text data. We used InternVL2 for extracting the image context along with Knowledge Distillation from bigger LLMs to train Small Language Model on the tranlsation task. During current shared task phase, we submitted best models (for this task), and overall we got rank 3 on Hindi, Bengali, and Malyalam datasets. We also open source our models on huggingface.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rajpoot-etal-2024-multimodal">
<titleInfo>
<title>Multimodal Machine Translation for Low-Resource Indic Languages: A Chain-of-Thought Approach Using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pawan</namePart>
<namePart type="family">Rajpoot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nagaraj</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the approach and results of team v036 in the English-to-Low-Resource Multi-Modal Translation Task at the Ninth Conference on Machine Translation (WMT24). Our team tackled the challenge of translating English source text to low-resource Indic languages, specifically Hindi, Malayalam, and Bengali, while leveraging visual context provided alongside the text data. We used InternVL2 for extracting the image context along with Knowledge Distillation from bigger LLMs to train Small Language Model on the tranlsation task. During current shared task phase, we submitted best models (for this task), and overall we got rank 3 on Hindi, Bengali, and Malyalam datasets. We also open source our models on huggingface.</abstract>
<identifier type="citekey">rajpoot-etal-2024-multimodal</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.79</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>833</start>
<end>838</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multimodal Machine Translation for Low-Resource Indic Languages: A Chain-of-Thought Approach Using Large Language Models
%A Rajpoot, Pawan
%A Bhat, Nagaraj
%A Shrivastava, Ashish
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F rajpoot-etal-2024-multimodal
%X This paper presents the approach and results of team v036 in the English-to-Low-Resource Multi-Modal Translation Task at the Ninth Conference on Machine Translation (WMT24). Our team tackled the challenge of translating English source text to low-resource Indic languages, specifically Hindi, Malayalam, and Bengali, while leveraging visual context provided alongside the text data. We used InternVL2 for extracting the image context along with Knowledge Distillation from bigger LLMs to train Small Language Model on the tranlsation task. During current shared task phase, we submitted best models (for this task), and overall we got rank 3 on Hindi, Bengali, and Malyalam datasets. We also open source our models on huggingface.
%U https://aclanthology.org/2024.wmt-1.79
%P 833-838
Markdown (Informal)
[Multimodal Machine Translation for Low-Resource Indic Languages: A Chain-of-Thought Approach Using Large Language Models](https://aclanthology.org/2024.wmt-1.79) (Rajpoot et al., WMT 2024)
ACL