@inproceedings{dhal-etal-2025-odiagenai,
title = "{O}dia{G}en{AI} participation at {WAT} 2025",
author = "Dhal, Debasish and
Sekhar, Sambit and
R, Revathy V and
Parida, Shantipriya and
Dhaka, Akash Kumar",
editor = "Nakazawa, Toshiaki and
Goto, Isao",
booktitle = "Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wat-1.11/",
pages = "109--114",
ISBN = "979-8-89176-309-8",
abstract = "We at ODIAGEN, provide a detailed description of the model, training procedure, results and conclusion of our submission to the Workshop on Asian Translation (WAT 2025). For this year, we focus only on text to text translation tasks on low resource Indic languages targetting Hindi, Bengali, Malayalam and Odia languages specifically. The system uses a large language model NLLB-200 finetuned on large datasets consisting of over 100K rows for each targetted language. The whole training dataset is made of the data provided by the organisers as in previous years and augmented by a much larger 100K sentences of data subsampled from the Samanantar dataset provided by AI4Bharat. From a total of eight evaluation/challenge tests, our approach obtained the highest BLEU scores yet, since the conception on five."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dhal-etal-2025-odiagenai">
<titleInfo>
<title>OdiaGenAI participation at WAT 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Debasish</namePart>
<namePart type="family">Dhal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sambit</namePart>
<namePart type="family">Sekhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Revathy</namePart>
<namePart type="given">V</namePart>
<namePart type="family">R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shantipriya</namePart>
<namePart type="family">Parida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akash</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Dhaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-309-8</identifier>
</relatedItem>
<abstract>We at ODIAGEN, provide a detailed description of the model, training procedure, results and conclusion of our submission to the Workshop on Asian Translation (WAT 2025). For this year, we focus only on text to text translation tasks on low resource Indic languages targetting Hindi, Bengali, Malayalam and Odia languages specifically. The system uses a large language model NLLB-200 finetuned on large datasets consisting of over 100K rows for each targetted language. The whole training dataset is made of the data provided by the organisers as in previous years and augmented by a much larger 100K sentences of data subsampled from the Samanantar dataset provided by AI4Bharat. From a total of eight evaluation/challenge tests, our approach obtained the highest BLEU scores yet, since the conception on five.</abstract>
<identifier type="citekey">dhal-etal-2025-odiagenai</identifier>
<location>
<url>https://aclanthology.org/2025.wat-1.11/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>109</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T OdiaGenAI participation at WAT 2025
%A Dhal, Debasish
%A Sekhar, Sambit
%A R, Revathy V.
%A Parida, Shantipriya
%A Dhaka, Akash Kumar
%Y Nakazawa, Toshiaki
%Y Goto, Isao
%S Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-309-8
%F dhal-etal-2025-odiagenai
%X We at ODIAGEN, provide a detailed description of the model, training procedure, results and conclusion of our submission to the Workshop on Asian Translation (WAT 2025). For this year, we focus only on text to text translation tasks on low resource Indic languages targetting Hindi, Bengali, Malayalam and Odia languages specifically. The system uses a large language model NLLB-200 finetuned on large datasets consisting of over 100K rows for each targetted language. The whole training dataset is made of the data provided by the organisers as in previous years and augmented by a much larger 100K sentences of data subsampled from the Samanantar dataset provided by AI4Bharat. From a total of eight evaluation/challenge tests, our approach obtained the highest BLEU scores yet, since the conception on five.
%U https://aclanthology.org/2025.wat-1.11/
%P 109-114
Markdown (Informal)
[OdiaGenAI participation at WAT 2025](https://aclanthology.org/2025.wat-1.11/) (Dhal et al., WAT 2025)
ACL
- Debasish Dhal, Sambit Sekhar, Revathy V R, Shantipriya Parida, and Akash Kumar Dhaka. 2025. OdiaGenAI participation at WAT 2025. In Proceedings of the Twelfth Workshop on Asian Translation (WAT 2025), pages 109–114, Mumbai, India. Association for Computational Linguistics.