@inproceedings{ferreira-etal-2026-p3b3,
title = "{P}3{B}3: A Multi-Turn Conversational Benchmark for Measuring {E}uropean and {B}razilian {P}ortuguese Variety Bias in {LLM}s",
author = "Ferreira, Rafael and
Vieira, In{\^e}s and
Calvo, In{\^e}s and
Furtado, James and
Paulo, Iago and
Gl{\'o}ria-Silva, Diogo and
Tavares, Diogo and
Semedo, David and
Magalhaes, Joao",
editor = "Huang, Kaiyu and
Mo, Fengran and
Chen, Pinzhen and
Jiang, Meng",
booktitle = "Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models ({M}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mellm-1.23/",
pages = "240--248",
ISBN = "979-8-89176-430-9",
abstract = "As Large Language Models (LLMs) become embedded in everyday communication, capturing regional linguistic variation is essential for reliable and equitable language use. In Portuguese, European (pt-PT) and Brazilian (pt-BR) varieties remain unevenly represented, with pt-BR dominating in data quantity, while LLM preference for Portuguese variants remains underexplored.To address this gap, we introduce P3B3, an expert-curated variety agnostic benchmark of conversational prompts, along with an evaluation framework for measuring variety bias and controllability.Experiments on several models show that most LLMs exhibit a strong bias toward pt-BR, with variation in controllability across models. These results highlight the need for more balanced multilingual representation across language varieties."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ferreira-etal-2026-p3b3">
<titleInfo>
<title>P3B3: A Multi-Turn Conversational Benchmark for Measuring European and Brazilian Portuguese Variety Bias in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inês</namePart>
<namePart type="family">Vieira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inês</namePart>
<namePart type="family">Calvo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Furtado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iago</namePart>
<namePart type="family">Paulo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diogo</namePart>
<namePart type="family">Glória-Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diogo</namePart>
<namePart type="family">Tavares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Semedo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joao</namePart>
<namePart type="family">Magalhaes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengran</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pinzhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-430-9</identifier>
</relatedItem>
<abstract>As Large Language Models (LLMs) become embedded in everyday communication, capturing regional linguistic variation is essential for reliable and equitable language use. In Portuguese, European (pt-PT) and Brazilian (pt-BR) varieties remain unevenly represented, with pt-BR dominating in data quantity, while LLM preference for Portuguese variants remains underexplored.To address this gap, we introduce P3B3, an expert-curated variety agnostic benchmark of conversational prompts, along with an evaluation framework for measuring variety bias and controllability.Experiments on several models show that most LLMs exhibit a strong bias toward pt-BR, with variation in controllability across models. These results highlight the need for more balanced multilingual representation across language varieties.</abstract>
<identifier type="citekey">ferreira-etal-2026-p3b3</identifier>
<location>
<url>https://aclanthology.org/2026.mellm-1.23/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>240</start>
<end>248</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T P3B3: A Multi-Turn Conversational Benchmark for Measuring European and Brazilian Portuguese Variety Bias in LLMs
%A Ferreira, Rafael
%A Vieira, Inês
%A Calvo, Inês
%A Furtado, James
%A Paulo, Iago
%A Glória-Silva, Diogo
%A Tavares, Diogo
%A Semedo, David
%A Magalhaes, Joao
%Y Huang, Kaiyu
%Y Mo, Fengran
%Y Chen, Pinzhen
%Y Jiang, Meng
%S Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-430-9
%F ferreira-etal-2026-p3b3
%X As Large Language Models (LLMs) become embedded in everyday communication, capturing regional linguistic variation is essential for reliable and equitable language use. In Portuguese, European (pt-PT) and Brazilian (pt-BR) varieties remain unevenly represented, with pt-BR dominating in data quantity, while LLM preference for Portuguese variants remains underexplored.To address this gap, we introduce P3B3, an expert-curated variety agnostic benchmark of conversational prompts, along with an evaluation framework for measuring variety bias and controllability.Experiments on several models show that most LLMs exhibit a strong bias toward pt-BR, with variation in controllability across models. These results highlight the need for more balanced multilingual representation across language varieties.
%U https://aclanthology.org/2026.mellm-1.23/
%P 240-248
Markdown (Informal)
[P3B3: A Multi-Turn Conversational Benchmark for Measuring European and Brazilian Portuguese Variety Bias in LLMs](https://aclanthology.org/2026.mellm-1.23/) (Ferreira et al., MeLLM 2026)
ACL
- Rafael Ferreira, Inês Vieira, Inês Calvo, James Furtado, Iago Paulo, Diogo Glória-Silva, Diogo Tavares, David Semedo, and Joao Magalhaes. 2026. P3B3: A Multi-Turn Conversational Benchmark for Measuring European and Brazilian Portuguese Variety Bias in LLMs. In Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026), pages 240–248, San Diego, United States. Association for Computational Linguistics.