@inproceedings{bonilla-2026-beyond,
title = "Beyond Acoustics: Isolating Dialectal and Sociolinguistic Bias in {S}panish {ASR}",
author = "Bonilla, Johnatan E.",
editor = "Card, Dallas and
Field, Anjalie and
Keith, Katherine and
Mendelsohn, Julia",
booktitle = "Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science",
month = jul,
year = "2026",
address = "San Diego",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlpcss-1.8/",
pages = "123--132",
ISBN = "979-8-89176-426-2",
abstract = "Large-scale ASR systems such as Whisper achieve competitive aggregate Word Error Rate (WER) on multilingual benchmarks, but this aggregate conceals systematic disparities across speaker populations. We evaluate Whisper large-v3 on 276 recordings from the \textit{Corpus Oral y Sonoro del Espa{\~n}ol Rural} (COSER), a dialectological archive of elderly rural speakers across all Spanish provinces. WER is computed separately for Informants and Interviewers within each recording, revealing that mixed-role evaluation underestimates Informant WER in the majority of provinces, with the largest corrections in southern areas.Negative Binomial regression with cluster-robust standar errors shows that Andalusia and Extremadura generate significantly more Informant errors than the Castilian heartland (Andalusia IRR = 1.20, $p < 0.001$; Extremadura IRR = 1.24, $p = 0.020$), while no geographic predictor reaches significance for Interviewers sharing the same recording environment. Male Informants generate 12.5{\%} more errors than females after geographic adjustment ($p < 0.001$), consistent with differential vernacular retention in traditional rural communities. The geographic pattern aligns with established dialectological classifications of Peninsular Spanish. These results demonstrate that role-disaggregated evaluation is a necessary methodological prerequisite for fairness audits of ASR systems applied to sociolinguistically diverse corpora: aggregate benchmarks systematically suppress disparities that are borne disproportionately by the most underrepresented speaker populations, and their use in isolation constitutes both an allocative harm and a measurement failure"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bonilla-2026-beyond">
<titleInfo>
<title>Beyond Acoustics: Isolating Dialectal and Sociolinguistic Bias in Spanish ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Johnatan</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Bonilla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dallas</namePart>
<namePart type="family">Card</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anjalie</namePart>
<namePart type="family">Field</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Mendelsohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-426-2</identifier>
</relatedItem>
<abstract>Large-scale ASR systems such as Whisper achieve competitive aggregate Word Error Rate (WER) on multilingual benchmarks, but this aggregate conceals systematic disparities across speaker populations. We evaluate Whisper large-v3 on 276 recordings from the Corpus Oral y Sonoro del Español Rural (COSER), a dialectological archive of elderly rural speakers across all Spanish provinces. WER is computed separately for Informants and Interviewers within each recording, revealing that mixed-role evaluation underestimates Informant WER in the majority of provinces, with the largest corrections in southern areas.Negative Binomial regression with cluster-robust standar errors shows that Andalusia and Extremadura generate significantly more Informant errors than the Castilian heartland (Andalusia IRR = 1.20, p < 0.001; Extremadura IRR = 1.24, p = 0.020), while no geographic predictor reaches significance for Interviewers sharing the same recording environment. Male Informants generate 12.5% more errors than females after geographic adjustment (p < 0.001), consistent with differential vernacular retention in traditional rural communities. The geographic pattern aligns with established dialectological classifications of Peninsular Spanish. These results demonstrate that role-disaggregated evaluation is a necessary methodological prerequisite for fairness audits of ASR systems applied to sociolinguistically diverse corpora: aggregate benchmarks systematically suppress disparities that are borne disproportionately by the most underrepresented speaker populations, and their use in isolation constitutes both an allocative harm and a measurement failure</abstract>
<identifier type="citekey">bonilla-2026-beyond</identifier>
<location>
<url>https://aclanthology.org/2026.nlpcss-1.8/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>123</start>
<end>132</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Acoustics: Isolating Dialectal and Sociolinguistic Bias in Spanish ASR
%A Bonilla, Johnatan E.
%Y Card, Dallas
%Y Field, Anjalie
%Y Keith, Katherine
%Y Mendelsohn, Julia
%S Proceedings of the Seventh Workshop on Natural Language Processing and Computational Social Science
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego
%@ 979-8-89176-426-2
%F bonilla-2026-beyond
%X Large-scale ASR systems such as Whisper achieve competitive aggregate Word Error Rate (WER) on multilingual benchmarks, but this aggregate conceals systematic disparities across speaker populations. We evaluate Whisper large-v3 on 276 recordings from the Corpus Oral y Sonoro del Español Rural (COSER), a dialectological archive of elderly rural speakers across all Spanish provinces. WER is computed separately for Informants and Interviewers within each recording, revealing that mixed-role evaluation underestimates Informant WER in the majority of provinces, with the largest corrections in southern areas.Negative Binomial regression with cluster-robust standar errors shows that Andalusia and Extremadura generate significantly more Informant errors than the Castilian heartland (Andalusia IRR = 1.20, p < 0.001; Extremadura IRR = 1.24, p = 0.020), while no geographic predictor reaches significance for Interviewers sharing the same recording environment. Male Informants generate 12.5% more errors than females after geographic adjustment (p < 0.001), consistent with differential vernacular retention in traditional rural communities. The geographic pattern aligns with established dialectological classifications of Peninsular Spanish. These results demonstrate that role-disaggregated evaluation is a necessary methodological prerequisite for fairness audits of ASR systems applied to sociolinguistically diverse corpora: aggregate benchmarks systematically suppress disparities that are borne disproportionately by the most underrepresented speaker populations, and their use in isolation constitutes both an allocative harm and a measurement failure
%U https://aclanthology.org/2026.nlpcss-1.8/
%P 123-132
Markdown (Informal)
[Beyond Acoustics: Isolating Dialectal and Sociolinguistic Bias in Spanish ASR](https://aclanthology.org/2026.nlpcss-1.8/) (Bonilla, NLP+CSS 2026)
ACL