@inproceedings{tsoukala-etal-2026-extending,
title = "Extending {ASR} Evaluation Resources for {M}odern {G}reek Dialects",
author = "Tsoukala, Chara and
Bompolas, Stavros and
Margariti, Antigoni and
Panagiotou, Konstantina and
Plaiti, Maria Elisavet and
Tzanakaki, Nefeli and
Karatsareas, Petros and
Ralli, Angela and
Anastasopoulos, Antonios and
Markantonatou, Stella",
booktitle = "Proceedings of the 13th Workshop on {NLP} for Similar Languages, Varieties and Dialects",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.vardial-1.17/",
pages = "210--222",
abstract = "Recent progress in Automatic Speech Recognition (ASR) has primarily benefited high-resource standard languages, while dialectal speech remains challenging and underexplored. We present an expanded benchmark for low-resource Modern Greek dialects, covering Aperathiot, Cretan, Lesbian, and Cappadocian, spanning southern, northern, and contact-influenced varieties with varying degrees of divergence from Standard Modern Greek. The benchmark provides dialectal transcriptions in the Greek alphabet, following SMG-based orthographic conventions, while preserving dialectal lexical and morphophonological forms. Using this benchmark, we evaluate state-of-the-art multilingual ASR models in a zero-shot setting and by further fine-tuning per dialect. Zero-shot results reveal a clear performance gradient with dialectal distance from Standard Modern Greek, with best WERs ranging from about 60-70{\%} for southern dialects to over 80{\%} for Lesbian and nearly 97{\%} for Cappadocian. Fine-tuning substantially reduces error rates (up to 47{\%} relative WER improvement), with Cappadocian remaining the most challenging variety (best WER 68.17{\%}). Overall, our results highlight persistent limitations of current pretrained ASR models under dialectal variation and the need for dedicated benchmarks and adaptation strategies."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tsoukala-etal-2026-extending">
<titleInfo>
<title>Extending ASR Evaluation Resources for Modern Greek Dialects</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chara</namePart>
<namePart type="family">Tsoukala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stavros</namePart>
<namePart type="family">Bompolas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antigoni</namePart>
<namePart type="family">Margariti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantina</namePart>
<namePart type="family">Panagiotou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Elisavet</namePart>
<namePart type="family">Plaiti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nefeli</namePart>
<namePart type="family">Tzanakaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Petros</namePart>
<namePart type="family">Karatsareas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Ralli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Markantonatou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent progress in Automatic Speech Recognition (ASR) has primarily benefited high-resource standard languages, while dialectal speech remains challenging and underexplored. We present an expanded benchmark for low-resource Modern Greek dialects, covering Aperathiot, Cretan, Lesbian, and Cappadocian, spanning southern, northern, and contact-influenced varieties with varying degrees of divergence from Standard Modern Greek. The benchmark provides dialectal transcriptions in the Greek alphabet, following SMG-based orthographic conventions, while preserving dialectal lexical and morphophonological forms. Using this benchmark, we evaluate state-of-the-art multilingual ASR models in a zero-shot setting and by further fine-tuning per dialect. Zero-shot results reveal a clear performance gradient with dialectal distance from Standard Modern Greek, with best WERs ranging from about 60-70% for southern dialects to over 80% for Lesbian and nearly 97% for Cappadocian. Fine-tuning substantially reduces error rates (up to 47% relative WER improvement), with Cappadocian remaining the most challenging variety (best WER 68.17%). Overall, our results highlight persistent limitations of current pretrained ASR models under dialectal variation and the need for dedicated benchmarks and adaptation strategies.</abstract>
<identifier type="citekey">tsoukala-etal-2026-extending</identifier>
<location>
<url>https://aclanthology.org/2026.vardial-1.17/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>210</start>
<end>222</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extending ASR Evaluation Resources for Modern Greek Dialects
%A Tsoukala, Chara
%A Bompolas, Stavros
%A Margariti, Antigoni
%A Panagiotou, Konstantina
%A Plaiti, Maria Elisavet
%A Tzanakaki, Nefeli
%A Karatsareas, Petros
%A Ralli, Angela
%A Anastasopoulos, Antonios
%A Markantonatou, Stella
%S Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F tsoukala-etal-2026-extending
%X Recent progress in Automatic Speech Recognition (ASR) has primarily benefited high-resource standard languages, while dialectal speech remains challenging and underexplored. We present an expanded benchmark for low-resource Modern Greek dialects, covering Aperathiot, Cretan, Lesbian, and Cappadocian, spanning southern, northern, and contact-influenced varieties with varying degrees of divergence from Standard Modern Greek. The benchmark provides dialectal transcriptions in the Greek alphabet, following SMG-based orthographic conventions, while preserving dialectal lexical and morphophonological forms. Using this benchmark, we evaluate state-of-the-art multilingual ASR models in a zero-shot setting and by further fine-tuning per dialect. Zero-shot results reveal a clear performance gradient with dialectal distance from Standard Modern Greek, with best WERs ranging from about 60-70% for southern dialects to over 80% for Lesbian and nearly 97% for Cappadocian. Fine-tuning substantially reduces error rates (up to 47% relative WER improvement), with Cappadocian remaining the most challenging variety (best WER 68.17%). Overall, our results highlight persistent limitations of current pretrained ASR models under dialectal variation and the need for dedicated benchmarks and adaptation strategies.
%U https://aclanthology.org/2026.vardial-1.17/
%P 210-222
Markdown (Informal)
[Extending ASR Evaluation Resources for Modern Greek Dialects](https://aclanthology.org/2026.vardial-1.17/) (Tsoukala et al., VarDial 2026)
ACL
- Chara Tsoukala, Stavros Bompolas, Antigoni Margariti, Konstantina Panagiotou, Maria Elisavet Plaiti, Nefeli Tzanakaki, Petros Karatsareas, Angela Ralli, Antonios Anastasopoulos, and Stella Markantonatou. 2026. Extending ASR Evaluation Resources for Modern Greek Dialects. In Proceedings of the 13th Workshop on NLP for Similar Languages, Varieties and Dialects, pages 210–222, Rabat, Morocco. Association for Computational Linguistics.