@inproceedings{gamboa-etal-2025-bias,
title = "Bias Attribution in {F}ilipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages",
author = "Gamboa, Lance Calvin Lim and
Feng, Yue and
Lee, Mark G.",
editor = "Fale{\'n}ska, Agnieszka and
Basta, Christine and
Costa-juss{\`a}, Marta and
Sta{\'n}czak, Karolina and
Nozza, Debora",
booktitle = "Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.gebnlp-1.19/",
doi = "10.18653/v1/2025.gebnlp-1.19",
pages = "195--205",
ISBN = "979-8-89176-277-0",
abstract = "Emerging research on bias attribution and interpretability have revealed how tokens contribute to biased behavior in language models processing English texts. We build on this line of inquiry by adapting the information-theoretic bias attribution score metric for implementation on models handling agglutinative languages{---}particularly Filipino. We then demonstrate the effectiveness of our adapted method by using it on a purely Filipino model and on three multilingual models{---}one trained on languages worldwide and two on Southeast Asian data. Our results show that Filipino models are driven towards bias by words pertaining to $\textit{people}$, $\textit{objects}$, and $\textit{relationships}${---}entity-based themes that stand in contrast to the action-heavy nature of bias-contributing themes in English (i.e., $\textit{criminal}$, $\textit{sexual}$, and $\textit{prosocial}$ behaviors). These findings point to differences in how English and non-English models process inputs linked to sociodemographic groups and bias."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gamboa-etal-2025-bias">
<titleInfo>
<title>Bias Attribution in Filipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lance</namePart>
<namePart type="given">Calvin</namePart>
<namePart type="given">Lim</namePart>
<namePart type="family">Gamboa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agnieszka</namePart>
<namePart type="family">Faleńska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Basta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marta</namePart>
<namePart type="family">Costa-jussà</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karolina</namePart>
<namePart type="family">Stańczak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-277-0</identifier>
</relatedItem>
<abstract>Emerging research on bias attribution and interpretability have revealed how tokens contribute to biased behavior in language models processing English texts. We build on this line of inquiry by adapting the information-theoretic bias attribution score metric for implementation on models handling agglutinative languages—particularly Filipino. We then demonstrate the effectiveness of our adapted method by using it on a purely Filipino model and on three multilingual models—one trained on languages worldwide and two on Southeast Asian data. Our results show that Filipino models are driven towards bias by words pertaining to people, objects, and relationships—entity-based themes that stand in contrast to the action-heavy nature of bias-contributing themes in English (i.e., criminal, sexual, and prosocial behaviors). These findings point to differences in how English and non-English models process inputs linked to sociodemographic groups and bias.</abstract>
<identifier type="citekey">gamboa-etal-2025-bias</identifier>
<identifier type="doi">10.18653/v1/2025.gebnlp-1.19</identifier>
<location>
<url>https://aclanthology.org/2025.gebnlp-1.19/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>195</start>
<end>205</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bias Attribution in Filipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages
%A Gamboa, Lance Calvin Lim
%A Feng, Yue
%A Lee, Mark G.
%Y Faleńska, Agnieszka
%Y Basta, Christine
%Y Costa-jussà, Marta
%Y Stańczak, Karolina
%Y Nozza, Debora
%S Proceedings of the 6th Workshop on Gender Bias in Natural Language Processing (GeBNLP)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-277-0
%F gamboa-etal-2025-bias
%X Emerging research on bias attribution and interpretability have revealed how tokens contribute to biased behavior in language models processing English texts. We build on this line of inquiry by adapting the information-theoretic bias attribution score metric for implementation on models handling agglutinative languages—particularly Filipino. We then demonstrate the effectiveness of our adapted method by using it on a purely Filipino model and on three multilingual models—one trained on languages worldwide and two on Southeast Asian data. Our results show that Filipino models are driven towards bias by words pertaining to people, objects, and relationships—entity-based themes that stand in contrast to the action-heavy nature of bias-contributing themes in English (i.e., criminal, sexual, and prosocial behaviors). These findings point to differences in how English and non-English models process inputs linked to sociodemographic groups and bias.
%R 10.18653/v1/2025.gebnlp-1.19
%U https://aclanthology.org/2025.gebnlp-1.19/
%U https://doi.org/10.18653/v1/2025.gebnlp-1.19
%P 195-205
Markdown (Informal)
[Bias Attribution in Filipino Language Models: Extending a Bias Interpretability Metric for Application on Agglutinative Languages](https://aclanthology.org/2025.gebnlp-1.19/) (Gamboa et al., GeBNLP 2025)
ACL