@inproceedings{samardzic-etal-2026-regional,
title = "Regional Variation in the Performance of {ASR} Models on {C}roatian and {S}erbian",
author = "Samard{\v{z}}i{\'c}, Tanja and
Rupnik, Peter and
Ljube{\v{s}}i{\'c}, Nikola",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.20/",
pages = "242--249",
abstract = "Regional variation was a limiting factor for automatic speech recognition (ASR) before large language models. With the new technology, speech processing becomes more general, which opens the question of how to use data in similar languages such as Croatian and Serbian. In this paper, we analyse model performance in the currently available train-test scenarios with the goal of better understanding the mutual interference of these two languages. Our findings suggest that better performing models are not very sensitive to the regional variation. Training from scratch in one of the languages can give good results on both of them, while fine-tuning large pre-trained multilingual models on smaller data sets does not give the expected results."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="samardzic-etal-2026-regional">
<titleInfo>
<title>Regional Variation in the Performance of ASR Models on Croatian and Serbian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanja</namePart>
<namePart type="family">Samardžić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Rupnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Regional variation was a limiting factor for automatic speech recognition (ASR) before large language models. With the new technology, speech processing becomes more general, which opens the question of how to use data in similar languages such as Croatian and Serbian. In this paper, we analyse model performance in the currently available train-test scenarios with the goal of better understanding the mutual interference of these two languages. Our findings suggest that better performing models are not very sensitive to the regional variation. Training from scratch in one of the languages can give good results on both of them, while fine-tuning large pre-trained multilingual models on smaller data sets does not give the expected results.</abstract>
<identifier type="citekey">samardzic-etal-2026-regional</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.20/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>242</start>
<end>249</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Regional Variation in the Performance of ASR Models on Croatian and Serbian
%A Samardžić, Tanja
%A Rupnik, Peter
%A Ljubešić, Nikola
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F samardzic-etal-2026-regional
%X Regional variation was a limiting factor for automatic speech recognition (ASR) before large language models. With the new technology, speech processing becomes more general, which opens the question of how to use data in similar languages such as Croatian and Serbian. In this paper, we analyse model performance in the currently available train-test scenarios with the goal of better understanding the mutual interference of these two languages. Our findings suggest that better performing models are not very sensitive to the regional variation. Training from scratch in one of the languages can give good results on both of them, while fine-tuning large pre-trained multilingual models on smaller data sets does not give the expected results.
%U https://aclanthology.org/2026.vardial-1.20/
%P 242-249
Markdown (Informal)
[Regional Variation in the Performance of ASR Models on Croatian and Serbian](https://aclanthology.org/2026.vardial-1.20/) (Samardžić et al., VarDial 2026)
ACL