@inproceedings{jawad-etal-2025-benchmarking,
title = "Benchmarking Large Language Models on {B}angla Dialect Translation and Dialectal Sentiment Analysis",
author = "Jawad, Md Mahir and
Ahmed, Rafid and
Apan, Ishita Sur and
Tomal, Tasnimul Hossain and
Haider, Fabiha and
Hossain, Mir Sazzat and
Bhuiyan, Md Farhad Alam",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Hassan, Naeemul and
Prince, Enamul Hoque and
Tasnim, Mohiuddin and
Rony, Md Rashad Al Hasan and
Rahman, Md Tahmid Rahman",
booktitle = "Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.banglalp-1.26/",
pages = "322--337",
ISBN = "979-8-89176-314-2",
abstract = "We present a novel Bangla Dialect Dataset comprising 600 annotated instances across four major dialects: Chattogram, Barishal, Sylhet, and Noakhali. The dataset was constructed from YouTube comments spanning diverse domains to capture authentic dialectal variations in informal online communication. Each instance includes the original dialectical text, its standard Bangla translation, and sentiment labels (Positive and Negative). We benchmark several state-of-the-art large language models on dialect-to-standard translation and sentiment analysis tasks using zero-shot and few-shot prompting strategies. Our experiments reveal that transliteration significantly improves translation quality for closed-source models, with GPT-4o-mini achieving the highest BLEU score of 0.343 in zero-shot with transliteration. For sentiment analysis, GPT-4o-mini demonstrates perfect precision, recall, and F1 scores (1.000) in few-shot settings. This dataset addresses the critical gap in resources for low-resource Bangla dialects and provides a foundation for developing dialect-aware NLP systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jawad-etal-2025-benchmarking">
<titleInfo>
<title>Benchmarking Large Language Models on Bangla Dialect Translation and Dialectal Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Mahir</namePart>
<namePart type="family">Jawad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafid</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ishita</namePart>
<namePart type="given">Sur</namePart>
<namePart type="family">Apan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tasnimul</namePart>
<namePart type="given">Hossain</namePart>
<namePart type="family">Tomal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabiha</namePart>
<namePart type="family">Haider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mir</namePart>
<namePart type="given">Sazzat</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Farhad</namePart>
<namePart type="given">Alam</namePart>
<namePart type="family">Bhuiyan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naeemul</namePart>
<namePart type="family">Hassan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enamul</namePart>
<namePart type="given">Hoque</namePart>
<namePart type="family">Prince</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohiuddin</namePart>
<namePart type="family">Tasnim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Rashad</namePart>
<namePart type="given">Al</namePart>
<namePart type="given">Hasan</namePart>
<namePart type="family">Rony</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Tahmid</namePart>
<namePart type="given">Rahman</namePart>
<namePart type="family">Rahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-314-2</identifier>
</relatedItem>
<abstract>We present a novel Bangla Dialect Dataset comprising 600 annotated instances across four major dialects: Chattogram, Barishal, Sylhet, and Noakhali. The dataset was constructed from YouTube comments spanning diverse domains to capture authentic dialectal variations in informal online communication. Each instance includes the original dialectical text, its standard Bangla translation, and sentiment labels (Positive and Negative). We benchmark several state-of-the-art large language models on dialect-to-standard translation and sentiment analysis tasks using zero-shot and few-shot prompting strategies. Our experiments reveal that transliteration significantly improves translation quality for closed-source models, with GPT-4o-mini achieving the highest BLEU score of 0.343 in zero-shot with transliteration. For sentiment analysis, GPT-4o-mini demonstrates perfect precision, recall, and F1 scores (1.000) in few-shot settings. This dataset addresses the critical gap in resources for low-resource Bangla dialects and provides a foundation for developing dialect-aware NLP systems.</abstract>
<identifier type="citekey">jawad-etal-2025-benchmarking</identifier>
<location>
<url>https://aclanthology.org/2025.banglalp-1.26/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>322</start>
<end>337</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking Large Language Models on Bangla Dialect Translation and Dialectal Sentiment Analysis
%A Jawad, Md Mahir
%A Ahmed, Rafid
%A Apan, Ishita Sur
%A Tomal, Tasnimul Hossain
%A Haider, Fabiha
%A Hossain, Mir Sazzat
%A Bhuiyan, Md Farhad Alam
%Y Alam, Firoj
%Y Kar, Sudipta
%Y Chowdhury, Shammur Absar
%Y Hassan, Naeemul
%Y Prince, Enamul Hoque
%Y Tasnim, Mohiuddin
%Y Rony, Md Rashad Al Hasan
%Y Rahman, Md Tahmid Rahman
%S Proceedings of the Second Workshop on Bangla Language Processing (BLP-2025)
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-314-2
%F jawad-etal-2025-benchmarking
%X We present a novel Bangla Dialect Dataset comprising 600 annotated instances across four major dialects: Chattogram, Barishal, Sylhet, and Noakhali. The dataset was constructed from YouTube comments spanning diverse domains to capture authentic dialectal variations in informal online communication. Each instance includes the original dialectical text, its standard Bangla translation, and sentiment labels (Positive and Negative). We benchmark several state-of-the-art large language models on dialect-to-standard translation and sentiment analysis tasks using zero-shot and few-shot prompting strategies. Our experiments reveal that transliteration significantly improves translation quality for closed-source models, with GPT-4o-mini achieving the highest BLEU score of 0.343 in zero-shot with transliteration. For sentiment analysis, GPT-4o-mini demonstrates perfect precision, recall, and F1 scores (1.000) in few-shot settings. This dataset addresses the critical gap in resources for low-resource Bangla dialects and provides a foundation for developing dialect-aware NLP systems.
%U https://aclanthology.org/2025.banglalp-1.26/
%P 322-337
Markdown (Informal)
[Benchmarking Large Language Models on Bangla Dialect Translation and Dialectal Sentiment Analysis](https://aclanthology.org/2025.banglalp-1.26/) (Jawad et al., BanglaLP 2025)
ACL