@inproceedings{dolev-etal-2024-whisper,
title = "Does Whisper Understand {S}wiss {G}erman? An Automatic, Qualitative, and Human Evaluation",
author = {Dolev, Eyal and
Lutz, Clemens and
Aepli, No{\"e}mi},
editor = {Scherrer, Yves and
Jauhiainen, Tommi and
Ljube{\v{s}}i{\'c}, Nikola and
Zampieri, Marcos and
Nakov, Preslav and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.vardial-1.3",
doi = "10.18653/v1/2024.vardial-1.3",
pages = "28--40",
abstract = {Whisper is a state-of-the-art automatic speech recognition (ASR) model (Radford et al., 2022). Although Swiss German dialects are allegedly not part of Whisper{'}s training data, preliminary experiments showed Whisper can transcribe Swiss German quite well, with the output being a speech translation into Standard German. To gain a better understanding of Whisper{'}s performance on Swiss German, we systematically evaluate it using automatic, qualitative, and human evaluation. We test its performance on three existing test sets: SwissDial (Dogan-Sch{\"o}nberger et al., 2021), STT4SG-350 (Pl{\"u}ss et al., 2023), and Swiss Parliaments Corpus (Pl{\"u}ss et al., 2021). In addition, we create a new test set for this study based on short mock clinical interviews. To automatically evaluate performance, we used word error rate (WER) and BLEU. We also conducted a qualitative analysis of Whisper{'}s performance, discussing its strengths and weaknesses. Finally, 28 people participated in a survey evaluating Whisper{'}s performance. All of our evaluations showed that Whisper is a viable ASR system for Swiss German, so long as the Standard German output is desired.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dolev-etal-2024-whisper">
<titleInfo>
<title>Does Whisper Understand Swiss German? An Automatic, Qualitative, and Human Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eyal</namePart>
<namePart type="family">Dolev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clemens</namePart>
<namePart type="family">Lutz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noëmi</namePart>
<namePart type="family">Aepli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yves</namePart>
<namePart type="family">Scherrer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tommi</namePart>
<namePart type="family">Jauhiainen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Whisper is a state-of-the-art automatic speech recognition (ASR) model (Radford et al., 2022). Although Swiss German dialects are allegedly not part of Whisper’s training data, preliminary experiments showed Whisper can transcribe Swiss German quite well, with the output being a speech translation into Standard German. To gain a better understanding of Whisper’s performance on Swiss German, we systematically evaluate it using automatic, qualitative, and human evaluation. We test its performance on three existing test sets: SwissDial (Dogan-Schönberger et al., 2021), STT4SG-350 (Plüss et al., 2023), and Swiss Parliaments Corpus (Plüss et al., 2021). In addition, we create a new test set for this study based on short mock clinical interviews. To automatically evaluate performance, we used word error rate (WER) and BLEU. We also conducted a qualitative analysis of Whisper’s performance, discussing its strengths and weaknesses. Finally, 28 people participated in a survey evaluating Whisper’s performance. All of our evaluations showed that Whisper is a viable ASR system for Swiss German, so long as the Standard German output is desired.</abstract>
<identifier type="citekey">dolev-etal-2024-whisper</identifier>
<identifier type="doi">10.18653/v1/2024.vardial-1.3</identifier>
<location>
<url>https://aclanthology.org/2024.vardial-1.3</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>28</start>
<end>40</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Does Whisper Understand Swiss German? An Automatic, Qualitative, and Human Evaluation
%A Dolev, Eyal
%A Lutz, Clemens
%A Aepli, Noëmi
%Y Scherrer, Yves
%Y Jauhiainen, Tommi
%Y Ljubešić, Nikola
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Tiedemann, Jörg
%S Proceedings of the Eleventh Workshop on NLP for Similar Languages, Varieties, and Dialects (VarDial 2024)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F dolev-etal-2024-whisper
%X Whisper is a state-of-the-art automatic speech recognition (ASR) model (Radford et al., 2022). Although Swiss German dialects are allegedly not part of Whisper’s training data, preliminary experiments showed Whisper can transcribe Swiss German quite well, with the output being a speech translation into Standard German. To gain a better understanding of Whisper’s performance on Swiss German, we systematically evaluate it using automatic, qualitative, and human evaluation. We test its performance on three existing test sets: SwissDial (Dogan-Schönberger et al., 2021), STT4SG-350 (Plüss et al., 2023), and Swiss Parliaments Corpus (Plüss et al., 2021). In addition, we create a new test set for this study based on short mock clinical interviews. To automatically evaluate performance, we used word error rate (WER) and BLEU. We also conducted a qualitative analysis of Whisper’s performance, discussing its strengths and weaknesses. Finally, 28 people participated in a survey evaluating Whisper’s performance. All of our evaluations showed that Whisper is a viable ASR system for Swiss German, so long as the Standard German output is desired.
%R 10.18653/v1/2024.vardial-1.3
%U https://aclanthology.org/2024.vardial-1.3
%U https://doi.org/10.18653/v1/2024.vardial-1.3
%P 28-40
Markdown (Informal)
[Does Whisper Understand Swiss German? An Automatic, Qualitative, and Human Evaluation](https://aclanthology.org/2024.vardial-1.3) (Dolev et al., VarDial-WS 2024)
ACL