@inproceedings{gamboni-2025-fine,
title = "Fine-Tuning Whisper for {K}ildin {S}ami",
author = "Gamboni, Enzo",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
Rie{\ss}ler, Michael and
Morooka, Eiaki V. and
Kharlashkin, Lev},
booktitle = "Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages",
month = dec,
year = "2025",
address = "Joensuu, Finland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.iwclul-1.13/",
pages = "106--111",
ISBN = "979-8-89176-360-9",
abstract = "For this study, Whisper, an automatic speech recognition software, was fine-tuned on Kildin Sami, an endangered and low-resource Uralic language, using an automatic speech recognition-tailored dataset of less than 30 minutes. Three different Whisper models were trained with this dataset{---}each one with a different base language (English, Finnish, or Russian){---}to examine which model provided the best result. Results were measured using Word Error Rate; fine-tuning the Russian-base Whisper model resulted in the lowest Word Error Rate at 68.55{\%}. While still high, this result is impressive for only a small amount of language-specific training data, and the training process yielded insights relevant for potential for further work."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gamboni-2025-fine">
<titleInfo>
<title>Fine-Tuning Whisper for Kildin Sami</title>
</titleInfo>
<name type="personal">
<namePart type="given">Enzo</namePart>
<namePart type="family">Gamboni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Rießler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiaki</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Morooka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lev</namePart>
<namePart type="family">Kharlashkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Joensuu, Finland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-360-9</identifier>
</relatedItem>
<abstract>For this study, Whisper, an automatic speech recognition software, was fine-tuned on Kildin Sami, an endangered and low-resource Uralic language, using an automatic speech recognition-tailored dataset of less than 30 minutes. Three different Whisper models were trained with this dataset—each one with a different base language (English, Finnish, or Russian)—to examine which model provided the best result. Results were measured using Word Error Rate; fine-tuning the Russian-base Whisper model resulted in the lowest Word Error Rate at 68.55%. While still high, this result is impressive for only a small amount of language-specific training data, and the training process yielded insights relevant for potential for further work.</abstract>
<identifier type="citekey">gamboni-2025-fine</identifier>
<location>
<url>https://aclanthology.org/2025.iwclul-1.13/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>106</start>
<end>111</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fine-Tuning Whisper for Kildin Sami
%A Gamboni, Enzo
%Y Hämäläinen, Mika
%Y Rießler, Michael
%Y Morooka, Eiaki V.
%Y Kharlashkin, Lev
%S Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages
%D 2025
%8 December
%I Association for Computational Linguistics
%C Joensuu, Finland
%@ 979-8-89176-360-9
%F gamboni-2025-fine
%X For this study, Whisper, an automatic speech recognition software, was fine-tuned on Kildin Sami, an endangered and low-resource Uralic language, using an automatic speech recognition-tailored dataset of less than 30 minutes. Three different Whisper models were trained with this dataset—each one with a different base language (English, Finnish, or Russian)—to examine which model provided the best result. Results were measured using Word Error Rate; fine-tuning the Russian-base Whisper model resulted in the lowest Word Error Rate at 68.55%. While still high, this result is impressive for only a small amount of language-specific training data, and the training process yielded insights relevant for potential for further work.
%U https://aclanthology.org/2025.iwclul-1.13/
%P 106-111
Markdown (Informal)
[Fine-Tuning Whisper for Kildin Sami](https://aclanthology.org/2025.iwclul-1.13/) (Gamboni, IWCLUL 2025)
ACL
- Enzo Gamboni. 2025. Fine-Tuning Whisper for Kildin Sami. In Proceedings of the 10th International Workshop on Computational Linguistics for Uralic Languages, pages 106–111, Joensuu, Finland. Association for Computational Linguistics.