@inproceedings{kuparinen-2026-effects,
title = "Effects of Speaker Bias in Dialect Identification and Automatic Transcription with Self-Supervised Speech Models",
author = "Kuparinen, Olli",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.3/",
pages = "32--44",
abstract = "A major issue in audio modeling is speaker bias, in which the models learn language external traits, such as a speaker{'}s timbre or pitch, and use this information as a shortcut to a language task. This is especially problematic for dialectology, as it is typical in dialect corpora that only a few speakers represent a complete dialect area. In this paper, we explore the effects of speaker bias in two dialectal tasks: dialect identification and automatic dialectal transcription. We build two different data partitions of dialect interviews in Finnish and Norwegian: 1) a speaker dependent partition in which all of the speakers appear in training, development, and test sets, and 2) a speaker independent partition where each speaker only appears in exactly one set. We further experiment with modifications of the training data by augmenting the original audio with pitch shifts and noise, as well as changing the original speakers' voices with voice conversion models. We show that the dialect identification models are highly affected by speaker bias, whereas automatic dialectal transcription models are not. The audio modifications do not offer major performance gains for either of the languages or tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kuparinen-2026-effects">
<titleInfo>
<title>Effects of Speaker Bias in Dialect Identification and Automatic Transcription with Self-Supervised Speech Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Olli</namePart>
<namePart type="family">Kuparinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A major issue in audio modeling is speaker bias, in which the models learn language external traits, such as a speaker’s timbre or pitch, and use this information as a shortcut to a language task. This is especially problematic for dialectology, as it is typical in dialect corpora that only a few speakers represent a complete dialect area. In this paper, we explore the effects of speaker bias in two dialectal tasks: dialect identification and automatic dialectal transcription. We build two different data partitions of dialect interviews in Finnish and Norwegian: 1) a speaker dependent partition in which all of the speakers appear in training, development, and test sets, and 2) a speaker independent partition where each speaker only appears in exactly one set. We further experiment with modifications of the training data by augmenting the original audio with pitch shifts and noise, as well as changing the original speakers’ voices with voice conversion models. We show that the dialect identification models are highly affected by speaker bias, whereas automatic dialectal transcription models are not. The audio modifications do not offer major performance gains for either of the languages or tasks.</abstract>
<identifier type="citekey">kuparinen-2026-effects</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.3/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>32</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Effects of Speaker Bias in Dialect Identification and Automatic Transcription with Self-Supervised Speech Models
%A Kuparinen, Olli
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F kuparinen-2026-effects
%X A major issue in audio modeling is speaker bias, in which the models learn language external traits, such as a speaker’s timbre or pitch, and use this information as a shortcut to a language task. This is especially problematic for dialectology, as it is typical in dialect corpora that only a few speakers represent a complete dialect area. In this paper, we explore the effects of speaker bias in two dialectal tasks: dialect identification and automatic dialectal transcription. We build two different data partitions of dialect interviews in Finnish and Norwegian: 1) a speaker dependent partition in which all of the speakers appear in training, development, and test sets, and 2) a speaker independent partition where each speaker only appears in exactly one set. We further experiment with modifications of the training data by augmenting the original audio with pitch shifts and noise, as well as changing the original speakers’ voices with voice conversion models. We show that the dialect identification models are highly affected by speaker bias, whereas automatic dialectal transcription models are not. The audio modifications do not offer major performance gains for either of the languages or tasks.
%U https://aclanthology.org/2026.vardial-1.3/
%P 32-44
Markdown (Informal)
[Effects of Speaker Bias in Dialect Identification and Automatic Transcription with Self-Supervised Speech Models](https://aclanthology.org/2026.vardial-1.3/) (Kuparinen, VarDial 2026)
ACL