@inproceedings{ghimire-etal-2024-improving,
title = "Improving on the Limitations of the {ASR} Model in Low-Resourced Environments Using Parameter-Efficient Fine-Tuning",
author = "Ghimire, Rupak Raj and
Poudyal, Prakash and
Bal, Bal Krishna",
editor = "Lalitha Devi, Sobha and
Arora, Karunesh",
booktitle = "Proceedings of the 21st International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2024",
address = "AU-KBC Research Centre, Chennai, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2024.icon-1.47/",
pages = "408--415",
abstract = "Modern general-purpose speech recognition systems are more robust in languages with high resources. In contrast, achieving state-of-the-art accuracy for low-resource languages is still challenging. The fine-tuning of the pre-trained model is one of the highly popular practices which utilizes the existing information while efficiently learning from a small amount of data to enhance the precision and robustness of speech recognition tasks. This work attempts to diagnose the performance of a pre-trained model when transcribing the audio from the low-resource language. In this work, we apply an adapter-based iterative parameter-efficient fine-tuning strategy on a limited dataset aiming to improve the quality of the transcription of a previously fine-tuned model. For the experiment we used Whisper`s multilingual pre-trained speech model and Nepali as a test language. Using this approach we achieved Word Error Rate of 27.9{\%},which is more than 19{\%} improvement over pre-trained Whisper Large {\ensuremath{-}} V2."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ghimire-etal-2024-improving">
<titleInfo>
<title>Improving on the Limitations of the ASR Model in Low-Resourced Environments Using Parameter-Efficient Fine-Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rupak</namePart>
<namePart type="given">Raj</namePart>
<namePart type="family">Ghimire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prakash</namePart>
<namePart type="family">Poudyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 21st International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karunesh</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">AU-KBC Research Centre, Chennai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern general-purpose speech recognition systems are more robust in languages with high resources. In contrast, achieving state-of-the-art accuracy for low-resource languages is still challenging. The fine-tuning of the pre-trained model is one of the highly popular practices which utilizes the existing information while efficiently learning from a small amount of data to enhance the precision and robustness of speech recognition tasks. This work attempts to diagnose the performance of a pre-trained model when transcribing the audio from the low-resource language. In this work, we apply an adapter-based iterative parameter-efficient fine-tuning strategy on a limited dataset aiming to improve the quality of the transcription of a previously fine-tuned model. For the experiment we used Whisper‘s multilingual pre-trained speech model and Nepali as a test language. Using this approach we achieved Word Error Rate of 27.9%,which is more than 19% improvement over pre-trained Whisper Large \ensuremath- V2.</abstract>
<identifier type="citekey">ghimire-etal-2024-improving</identifier>
<location>
<url>https://aclanthology.org/2024.icon-1.47/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>408</start>
<end>415</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving on the Limitations of the ASR Model in Low-Resourced Environments Using Parameter-Efficient Fine-Tuning
%A Ghimire, Rupak Raj
%A Poudyal, Prakash
%A Bal, Bal Krishna
%Y Lalitha Devi, Sobha
%Y Arora, Karunesh
%S Proceedings of the 21st International Conference on Natural Language Processing (ICON)
%D 2024
%8 December
%I NLP Association of India (NLPAI)
%C AU-KBC Research Centre, Chennai, India
%F ghimire-etal-2024-improving
%X Modern general-purpose speech recognition systems are more robust in languages with high resources. In contrast, achieving state-of-the-art accuracy for low-resource languages is still challenging. The fine-tuning of the pre-trained model is one of the highly popular practices which utilizes the existing information while efficiently learning from a small amount of data to enhance the precision and robustness of speech recognition tasks. This work attempts to diagnose the performance of a pre-trained model when transcribing the audio from the low-resource language. In this work, we apply an adapter-based iterative parameter-efficient fine-tuning strategy on a limited dataset aiming to improve the quality of the transcription of a previously fine-tuned model. For the experiment we used Whisper‘s multilingual pre-trained speech model and Nepali as a test language. Using this approach we achieved Word Error Rate of 27.9%,which is more than 19% improvement over pre-trained Whisper Large \ensuremath- V2.
%U https://aclanthology.org/2024.icon-1.47/
%P 408-415
Markdown (Informal)
[Improving on the Limitations of the ASR Model in Low-Resourced Environments Using Parameter-Efficient Fine-Tuning](https://aclanthology.org/2024.icon-1.47/) (Ghimire et al., ICON 2024)
ACL