@inproceedings{arnob-etal-2025-assessing,
title = "Assessing Gender Bias of Pretrained {B}angla Language Models in {STEM} and {SHAPE} Fields",
author = "Arnob, Noor Mairukh Khan and
Mahmud, Saiyara and
Wasi, Azmine Toushik",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Sta{\'n}czak, Karolina and
Nozza, Debora",
booktitle = "Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.gebnlp-1.24/",
doi = "10.18653/v1/2025.gebnlp-1.24",
pages = "268--281",
ISBN = "979-8-89176-277-0",
abstract = "Gender bias continues to shape societal perceptions across both STEM (Science, Technology, Engineering, and Mathematics) and SHAPE (Social Sciences, Humanities, and the Arts for People and the Economy) domains. While existing studies have explored such biases in English language models, similar analyses in Bangla{---}spoken by over 240 million people{---}remain scarce. In this work, we investigate gender-profession associations in Bangla language models. We introduce \textit{Pokkhopat}, a curated dataset of gendered terms and profession-related words across STEM and SHAPE disciplines. Using a suite of embedding-based bias detection methods{---}including WEAT, ECT, RND, RIPA, and cosine similarity visualizations{---}we evaluate 11 Bangla language models. Our findings show that several widely-used open-source Bangla NLP models (e.g., sagorsarker/bangla-bert-base) exhibit significant gender bias, underscoring the need for more inclusive and bias-aware development in low-resource languages like Bangla. We also find that many STEM and SHAPE-related words are absent from these models' vocabularies, complicating bias detection and possibly amplifying existing biases. This emphasizes the importance of incorporating more diverse and comprehensive training data to mitigate such biases moving forward. Code available at \url{https://github.com/HerWILL-Inc/ACL-2025/}."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="arnob-etal-2025-assessing">
<titleInfo>
<title>Assessing Gender Bias of Pretrained Bangla Language Models in STEM and SHAPE Fields</title>
</titleInfo>
<name type="personal">
<namePart type="given">Noor</namePart>
<namePart type="given">Mairukh</namePart>
<namePart type="given">Khan</namePart>
<namePart type="family">Arnob</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saiyara</namePart>
<namePart type="family">Mahmud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Azmine</namePart>
<namePart type="given">Toushik</namePart>
<namePart type="family">Wasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnieszka</namePart>
<namePart type="family">Faleńska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karolina</namePart>
<namePart type="family">Stańczak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-277-0</identifier>
</relatedItem>
<abstract>Gender bias continues to shape societal perceptions across both STEM (Science, Technology, Engineering, and Mathematics) and SHAPE (Social Sciences, Humanities, and the Arts for People and the Economy) domains. While existing studies have explored such biases in English language models, similar analyses in Bangla—spoken by over 240 million people—remain scarce. In this work, we investigate gender-profession associations in Bangla language models. We introduce Pokkhopat, a curated dataset of gendered terms and profession-related words across STEM and SHAPE disciplines. Using a suite of embedding-based bias detection methods—including WEAT, ECT, RND, RIPA, and cosine similarity visualizations—we evaluate 11 Bangla language models. Our findings show that several widely-used open-source Bangla NLP models (e.g., sagorsarker/bangla-bert-base) exhibit significant gender bias, underscoring the need for more inclusive and bias-aware development in low-resource languages like Bangla. We also find that many STEM and SHAPE-related words are absent from these models’ vocabularies, complicating bias detection and possibly amplifying existing biases. This emphasizes the importance of incorporating more diverse and comprehensive training data to mitigate such biases moving forward. Code available at https://github.com/HerWILL-Inc/ACL-2025/.</abstract>
<identifier type="citekey">arnob-etal-2025-assessing</identifier>
<identifier type="doi">10.18653/v1/2025.gebnlp-1.24</identifier>
<location>
<url>https://aclanthology.org/2025.gebnlp-1.24/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>268</start>
<end>281</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Gender Bias of Pretrained Bangla Language Models in STEM and SHAPE Fields
%A Arnob, Noor Mairukh Khan
%A Mahmud, Saiyara
%A Wasi, Azmine Toushik
%Y Faleńska, Agnieszka
%Y Basta, Christine
%Y Costa-jussà, Marta
%Y Stańczak, Karolina
%Y Nozza, Debora
%S Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-277-0
%F arnob-etal-2025-assessing
%X Gender bias continues to shape societal perceptions across both STEM (Science, Technology, Engineering, and Mathematics) and SHAPE (Social Sciences, Humanities, and the Arts for People and the Economy) domains. While existing studies have explored such biases in English language models, similar analyses in Bangla—spoken by over 240 million people—remain scarce. In this work, we investigate gender-profession associations in Bangla language models. We introduce Pokkhopat, a curated dataset of gendered terms and profession-related words across STEM and SHAPE disciplines. Using a suite of embedding-based bias detection methods—including WEAT, ECT, RND, RIPA, and cosine similarity visualizations—we evaluate 11 Bangla language models. Our findings show that several widely-used open-source Bangla NLP models (e.g., sagorsarker/bangla-bert-base) exhibit significant gender bias, underscoring the need for more inclusive and bias-aware development in low-resource languages like Bangla. We also find that many STEM and SHAPE-related words are absent from these models’ vocabularies, complicating bias detection and possibly amplifying existing biases. This emphasizes the importance of incorporating more diverse and comprehensive training data to mitigate such biases moving forward. Code available at https://github.com/HerWILL-Inc/ACL-2025/.
%R 10.18653/v1/2025.gebnlp-1.24
%U https://aclanthology.org/2025.gebnlp-1.24/
%U https://doi.org/10.18653/v1/2025.gebnlp-1.24
%P 268-281
Markdown (Informal)
[Assessing Gender Bias of Pretrained Bangla Language Models in STEM and SHAPE Fields](https://aclanthology.org/2025.gebnlp-1.24/) (Arnob et al., GeBNLP 2025)
ACL