@inproceedings{dewan-rifat-2025-pybhasha,
title = "{P}y{B}hasha at {BLP}-2025 Task 2: Effectiveness of Semantic-Aware Translation and Ensembling in {B}angla Code Generation",
author = "Dewan, Foyez Ahmed and
Rifat, Nahid Montasir",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Hassan, Naeemul and
Prince, Enamul Hoque and
Tasnim, Mohiuddin and
Rony, Md Rashad Al Hasan and
Rahman, Md Tahmid Rahman",
booktitle = "Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.banglalp-1.64/",
pages = "624--628",
ISBN = "979-8-89176-314-2",
abstract = "In this paper, we present our submission to Task 2 of the BLP-2025 shared task on code generation from Bangla instructions. Our approach focused on enhancing instruction quality through translation and improving model performance with a two-stage ensemble strategy. We evaluated two proprietary and several open-source models under three instruction settings: original Bangla instructions, Bangla instructions translated into English using Facebook NLLB, and instructions rewritten in English with GPT-4.1. Experimental results showed that GPT-4.1-rewritten instructions consistently achieved the highest accuracy across models. For final predictions, we used a two-stage ensemble, achieving a $\textit{pass@1}$ score of $\textbf{80.0\%}$ on the hidden test set and securing 12th place on the official leaderboard. Additionally, we conducted a qualitative analysis of selected translations to illustrate how variations in instruction phrasing influenced model outputs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dewan-rifat-2025-pybhasha">
<titleInfo>
<title>PyBhasha at BLP-2025 Task 2: Effectiveness of Semantic-Aware Translation and Ensembling in Bangla Code Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Foyez</namePart>
<namePart type="given">Ahmed</namePart>
<namePart type="family">Dewan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nahid</namePart>
<namePart type="given">Montasir</namePart>
<namePart type="family">Rifat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naeemul</namePart>
<namePart type="family">Hassan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enamul</namePart>
<namePart type="given">Hoque</namePart>
<namePart type="family">Prince</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohiuddin</namePart>
<namePart type="family">Tasnim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Rashad</namePart>
<namePart type="given">Al</namePart>
<namePart type="given">Hasan</namePart>
<namePart type="family">Rony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Tahmid</namePart>
<namePart type="given">Rahman</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-314-2</identifier>
</relatedItem>
<abstract>In this paper, we present our submission to Task 2 of the BLP-2025 shared task on code generation from Bangla instructions. Our approach focused on enhancing instruction quality through translation and improving model performance with a two-stage ensemble strategy. We evaluated two proprietary and several open-source models under three instruction settings: original Bangla instructions, Bangla instructions translated into English using Facebook NLLB, and instructions rewritten in English with GPT-4.1. Experimental results showed that GPT-4.1-rewritten instructions consistently achieved the highest accuracy across models. For final predictions, we used a two-stage ensemble, achieving a pass@1 score of 80.0% on the hidden test set and securing 12th place on the official leaderboard. Additionally, we conducted a qualitative analysis of selected translations to illustrate how variations in instruction phrasing influenced model outputs.</abstract>
<identifier type="citekey">dewan-rifat-2025-pybhasha</identifier>
<location>
<url>https://aclanthology.org/2025.banglalp-1.64/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>624</start>
<end>628</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PyBhasha at BLP-2025 Task 2: Effectiveness of Semantic-Aware Translation and Ensembling in Bangla Code Generation
%A Dewan, Foyez Ahmed
%A Rifat, Nahid Montasir
%Y Alam, Firoj
%Y Kar, Sudipta
%Y Chowdhury, Shammur Absar
%Y Hassan, Naeemul
%Y Prince, Enamul Hoque
%Y Tasnim, Mohiuddin
%Y Rony, Md Rashad Al Hasan
%Y Rahman, Md Tahmid Rahman
%S Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-314-2
%F dewan-rifat-2025-pybhasha
%X In this paper, we present our submission to Task 2 of the BLP-2025 shared task on code generation from Bangla instructions. Our approach focused on enhancing instruction quality through translation and improving model performance with a two-stage ensemble strategy. We evaluated two proprietary and several open-source models under three instruction settings: original Bangla instructions, Bangla instructions translated into English using Facebook NLLB, and instructions rewritten in English with GPT-4.1. Experimental results showed that GPT-4.1-rewritten instructions consistently achieved the highest accuracy across models. For final predictions, we used a two-stage ensemble, achieving a pass@1 score of 80.0% on the hidden test set and securing 12th place on the official leaderboard. Additionally, we conducted a qualitative analysis of selected translations to illustrate how variations in instruction phrasing influenced model outputs.
%U https://aclanthology.org/2025.banglalp-1.64/
%P 624-628
Markdown (Informal)
[PyBhasha at BLP-2025 Task 2: Effectiveness of Semantic-Aware Translation and Ensembling in Bangla Code Generation](https://aclanthology.org/2025.banglalp-1.64/) (Dewan & Rifat, BanglaLP 2025)
ACL