@inproceedings{saralegi-zulaika-2025-basqbbq,
title = "{B}asq{BBQ}: A {QA} Benchmark for Assessing Social Biases in {LLM}s for {B}asque, a Low-Resource Language",
author = "Saralegi, Xabier and
Zulaika, Muitze",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.318/",
pages = "4753--4767",
abstract = "The rise of pre-trained language models has revolutionized natural language processing (NLP) tasks, but concerns about the propagation of social biases in these models remain, particularly in under-resourced languages like Basque. This paper introduces BasqBBQ, the first benchmark designed to assess social biases in Basque across eight domains, using a multiple-choice question-answering (QA) task. We evaluate various autoregressive large language models (LLMs), including multilingual and those adapted for Basque, to analyze both their accuracy and bias transmission. Our results show that while larger models generally achieve better accuracy, ambiguous cases remain challenging. In terms of bias, larger models exhibit lower negative bias. However, high negative bias persists in specific categories such as Disability Status, Age and Physical Appearance, especially in ambiguous contexts. Conversely, categories such as Sexual Orientation, Gender Identity, and Race/Ethnicity show the least bias in ambiguous contexts. The continual pre-training based adaptation process for Basque has a limited impact on bias when compared with English. This work represents a key step toward creating more ethical LLMs for low-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saralegi-zulaika-2025-basqbbq">
<titleInfo>
<title>BasqBBQ: A QA Benchmark for Assessing Social Biases in LLMs for Basque, a Low-Resource Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xabier</namePart>
<namePart type="family">Saralegi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muitze</namePart>
<namePart type="family">Zulaika</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The rise of pre-trained language models has revolutionized natural language processing (NLP) tasks, but concerns about the propagation of social biases in these models remain, particularly in under-resourced languages like Basque. This paper introduces BasqBBQ, the first benchmark designed to assess social biases in Basque across eight domains, using a multiple-choice question-answering (QA) task. We evaluate various autoregressive large language models (LLMs), including multilingual and those adapted for Basque, to analyze both their accuracy and bias transmission. Our results show that while larger models generally achieve better accuracy, ambiguous cases remain challenging. In terms of bias, larger models exhibit lower negative bias. However, high negative bias persists in specific categories such as Disability Status, Age and Physical Appearance, especially in ambiguous contexts. Conversely, categories such as Sexual Orientation, Gender Identity, and Race/Ethnicity show the least bias in ambiguous contexts. The continual pre-training based adaptation process for Basque has a limited impact on bias when compared with English. This work represents a key step toward creating more ethical LLMs for low-resource languages.</abstract>
<identifier type="citekey">saralegi-zulaika-2025-basqbbq</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.318/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>4753</start>
<end>4767</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T BasqBBQ: A QA Benchmark for Assessing Social Biases in LLMs for Basque, a Low-Resource Language
%A Saralegi, Xabier
%A Zulaika, Muitze
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F saralegi-zulaika-2025-basqbbq
%X The rise of pre-trained language models has revolutionized natural language processing (NLP) tasks, but concerns about the propagation of social biases in these models remain, particularly in under-resourced languages like Basque. This paper introduces BasqBBQ, the first benchmark designed to assess social biases in Basque across eight domains, using a multiple-choice question-answering (QA) task. We evaluate various autoregressive large language models (LLMs), including multilingual and those adapted for Basque, to analyze both their accuracy and bias transmission. Our results show that while larger models generally achieve better accuracy, ambiguous cases remain challenging. In terms of bias, larger models exhibit lower negative bias. However, high negative bias persists in specific categories such as Disability Status, Age and Physical Appearance, especially in ambiguous contexts. Conversely, categories such as Sexual Orientation, Gender Identity, and Race/Ethnicity show the least bias in ambiguous contexts. The continual pre-training based adaptation process for Basque has a limited impact on bias when compared with English. This work represents a key step toward creating more ethical LLMs for low-resource languages.
%U https://aclanthology.org/2025.coling-main.318/
%P 4753-4767
Markdown (Informal)
[BasqBBQ: A QA Benchmark for Assessing Social Biases in LLMs for Basque, a Low-Resource Language](https://aclanthology.org/2025.coling-main.318/) (Saralegi & Zulaika, COLING 2025)
ACL