@inproceedings{roy-2025-codeanubad,
title = "{C}ode{A}nubad at {BLP}-2025 Task 2: Efficient {B}angla-to-Python Code Generation via Iterative {L}o{RA} Fine-Tuning of Gemma-2",
author = "Roy, Soumyajit",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Hassan, Naeemul and
Prince, Enamul Hoque and
Tasnim, Mohiuddin and
Rony, Md Rashad Al Hasan and
Rahman, Md Tahmid Rahman",
booktitle = "Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.banglalp-1.53/",
pages = "556--560",
ISBN = "979-8-89176-314-2",
abstract = "This paper presents our submission for Task 2 of the Bangla Language Processing (BLP) Workshop, which focuses on generating Python code from Bangla programming prompts in a low-resource setting. We address this challenge by fine-tuning the gemma-2-9b instruction-tuned model using parameter-efficient fine-tuning (PEFT) with QLoRA. We propose an iterative self-improvement strategy that augments the extremely limited training data (74 examples) by reusing verified correct predictions from the development set, alongside LoRA rank experiments (8, 16, 32), observing a clear correlation between rank and accuracy, with rank 32 delivering the best results. Compared to translation-based and retrieval-augmented baselines, our approach achieves significantly higher accuracy, with a pass rate of 47{\%} on the development set and 37{\%} on the hidden test set. These results highlight the effectiveness of combining iterative data augmentation with rank optimisation for specialised, low-resource code generation tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="roy-2025-codeanubad">
<titleInfo>
<title>CodeAnubad at BLP-2025 Task 2: Efficient Bangla-to-Python Code Generation via Iterative LoRA Fine-Tuning of Gemma-2</title>
</titleInfo>
<name type="personal">
<namePart type="given">Soumyajit</namePart>
<namePart type="family">Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naeemul</namePart>
<namePart type="family">Hassan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enamul</namePart>
<namePart type="given">Hoque</namePart>
<namePart type="family">Prince</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohiuddin</namePart>
<namePart type="family">Tasnim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Rashad</namePart>
<namePart type="given">Al</namePart>
<namePart type="given">Hasan</namePart>
<namePart type="family">Rony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Tahmid</namePart>
<namePart type="given">Rahman</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-314-2</identifier>
</relatedItem>
<abstract>This paper presents our submission for Task 2 of the Bangla Language Processing (BLP) Workshop, which focuses on generating Python code from Bangla programming prompts in a low-resource setting. We address this challenge by fine-tuning the gemma-2-9b instruction-tuned model using parameter-efficient fine-tuning (PEFT) with QLoRA. We propose an iterative self-improvement strategy that augments the extremely limited training data (74 examples) by reusing verified correct predictions from the development set, alongside LoRA rank experiments (8, 16, 32), observing a clear correlation between rank and accuracy, with rank 32 delivering the best results. Compared to translation-based and retrieval-augmented baselines, our approach achieves significantly higher accuracy, with a pass rate of 47% on the development set and 37% on the hidden test set. These results highlight the effectiveness of combining iterative data augmentation with rank optimisation for specialised, low-resource code generation tasks.</abstract>
<identifier type="citekey">roy-2025-codeanubad</identifier>
<location>
<url>https://aclanthology.org/2025.banglalp-1.53/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>556</start>
<end>560</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CodeAnubad at BLP-2025 Task 2: Efficient Bangla-to-Python Code Generation via Iterative LoRA Fine-Tuning of Gemma-2
%A Roy, Soumyajit
%Y Alam, Firoj
%Y Kar, Sudipta
%Y Chowdhury, Shammur Absar
%Y Hassan, Naeemul
%Y Prince, Enamul Hoque
%Y Tasnim, Mohiuddin
%Y Rony, Md Rashad Al Hasan
%Y Rahman, Md Tahmid Rahman
%S Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-314-2
%F roy-2025-codeanubad
%X This paper presents our submission for Task 2 of the Bangla Language Processing (BLP) Workshop, which focuses on generating Python code from Bangla programming prompts in a low-resource setting. We address this challenge by fine-tuning the gemma-2-9b instruction-tuned model using parameter-efficient fine-tuning (PEFT) with QLoRA. We propose an iterative self-improvement strategy that augments the extremely limited training data (74 examples) by reusing verified correct predictions from the development set, alongside LoRA rank experiments (8, 16, 32), observing a clear correlation between rank and accuracy, with rank 32 delivering the best results. Compared to translation-based and retrieval-augmented baselines, our approach achieves significantly higher accuracy, with a pass rate of 47% on the development set and 37% on the hidden test set. These results highlight the effectiveness of combining iterative data augmentation with rank optimisation for specialised, low-resource code generation tasks.
%U https://aclanthology.org/2025.banglalp-1.53/
%P 556-560
Markdown (Informal)
[CodeAnubad at BLP-2025 Task 2: Efficient Bangla-to-Python Code Generation via Iterative LoRA Fine-Tuning of Gemma-2](https://aclanthology.org/2025.banglalp-1.53/) (Roy, BanglaLP 2025)
ACL