@inproceedings{pantha-etal-2025-speech,
title = "Speech Personalization using Parameter Efficient Fine-Tuning for {N}epali Speakers",
author = "Pantha, Kiran and
Ghimire, Rupak Raj and
Bal, Bal Krishna",
editor = "Gkirtzou, Katerina and
{\v{Z}}itnik, Slavko and
Gracia, Jorge and
Gromann, Dagmar and
di Buono, Maria Pia and
Monti, Johanna and
Ionov, Maxim",
booktitle = "Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = sep,
year = "2025",
address = "Naples, Italy",
publisher = "Unior Press",
url = "https://aclanthology.org/2025.ltedi-1.31/",
pages = "190--199",
ISBN = "978-88-6719-334-9",
abstract = "The performance of Automatic Speech Recognition (ASR) systems has improved significantly, driven by advancements in large-scale pre-trained models. However, adapting such models to low-resource languages such as Nepali is challenging due to the lack of labeled data and computational resources. Additionally, adapting the unique speech parameters of the speaker to a model is also a challenging task. Personalization helps to target the model to fit the particular speaker. This work investigates parameter-efficient fine-tuning (PEFT) methods like Low-Rank Adaptation (LoRA) and Decomposed Weight Low-Rank Adaptation (DoRA) to improve the performance of fine-tuned Whisper ASR models for Nepali ASR tasks by Personalization. These experiments demonstrate that the PEFT methods obtain competitive results while significantly reducing the number of trainable parameters compared to full fine-tuning. LoRA and DoRA show a relative WER to $FT_{Base}$ increment of 34.93{\%} and 36.79{\%}, respectively, and a relative CER to $FT_{Base}$ increment of 49.50{\%} and 50.03{\%}, respectively. Furthermore, the results highlight a 99.74{\%} reduction in total training parameters."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pantha-etal-2025-speech">
<titleInfo>
<title>Speech Personalization using Parameter Efficient Fine-Tuning for Nepali Speakers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kiran</namePart>
<namePart type="family">Pantha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rupak</namePart>
<namePart type="given">Raj</namePart>
<namePart type="family">Ghimire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Katerina</namePart>
<namePart type="family">Gkirtzou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Slavko</namePart>
<namePart type="family">Žitnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorge</namePart>
<namePart type="family">Gracia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maxim</namePart>
<namePart type="family">Ionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Unior Press</publisher>
<place>
<placeTerm type="text">Naples, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-88-6719-334-9</identifier>
</relatedItem>
<abstract>The performance of Automatic Speech Recognition (ASR) systems has improved significantly, driven by advancements in large-scale pre-trained models. However, adapting such models to low-resource languages such as Nepali is challenging due to the lack of labeled data and computational resources. Additionally, adapting the unique speech parameters of the speaker to a model is also a challenging task. Personalization helps to target the model to fit the particular speaker. This work investigates parameter-efficient fine-tuning (PEFT) methods like Low-Rank Adaptation (LoRA) and Decomposed Weight Low-Rank Adaptation (DoRA) to improve the performance of fine-tuned Whisper ASR models for Nepali ASR tasks by Personalization. These experiments demonstrate that the PEFT methods obtain competitive results while significantly reducing the number of trainable parameters compared to full fine-tuning. LoRA and DoRA show a relative WER to FT_Base increment of 34.93% and 36.79%, respectively, and a relative CER to FT_Base increment of 49.50% and 50.03%, respectively. Furthermore, the results highlight a 99.74% reduction in total training parameters.</abstract>
<identifier type="citekey">pantha-etal-2025-speech</identifier>
<location>
<url>https://aclanthology.org/2025.ltedi-1.31/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>190</start>
<end>199</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Speech Personalization using Parameter Efficient Fine-Tuning for Nepali Speakers
%A Pantha, Kiran
%A Ghimire, Rupak Raj
%A Bal, Bal Krishna
%Y Gkirtzou, Katerina
%Y Žitnik, Slavko
%Y Gracia, Jorge
%Y Gromann, Dagmar
%Y di Buono, Maria Pia
%Y Monti, Johanna
%Y Ionov, Maxim
%S Proceedings of the 5th Conference on Language, Data and Knowledge: Fifth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2025
%8 September
%I Unior Press
%C Naples, Italy
%@ 978-88-6719-334-9
%F pantha-etal-2025-speech
%X The performance of Automatic Speech Recognition (ASR) systems has improved significantly, driven by advancements in large-scale pre-trained models. However, adapting such models to low-resource languages such as Nepali is challenging due to the lack of labeled data and computational resources. Additionally, adapting the unique speech parameters of the speaker to a model is also a challenging task. Personalization helps to target the model to fit the particular speaker. This work investigates parameter-efficient fine-tuning (PEFT) methods like Low-Rank Adaptation (LoRA) and Decomposed Weight Low-Rank Adaptation (DoRA) to improve the performance of fine-tuned Whisper ASR models for Nepali ASR tasks by Personalization. These experiments demonstrate that the PEFT methods obtain competitive results while significantly reducing the number of trainable parameters compared to full fine-tuning. LoRA and DoRA show a relative WER to FT_Base increment of 34.93% and 36.79%, respectively, and a relative CER to FT_Base increment of 49.50% and 50.03%, respectively. Furthermore, the results highlight a 99.74% reduction in total training parameters.
%U https://aclanthology.org/2025.ltedi-1.31/
%P 190-199
Markdown (Informal)
[Speech Personalization using Parameter Efficient Fine-Tuning for Nepali Speakers](https://aclanthology.org/2025.ltedi-1.31/) (Pantha et al., LTEDI 2025)
ACL