@inproceedings{liu-etal-2025-study,
title = "A Study on a Low-Resource Speech Recognition System for {T}aiwan {H}akka Based on Whisper and {L}o{RA}",
author = "Liu, Zheng-Ting and
Wang, Heng-You and
Liao, Yi-Xiang and
Qiu, Zhong-Yuan and
Huang, Zhao-Yi",
editor = "Chang, Kai-Wei and
Lu, Ke-Han and
Yang, Chih-Kai and
Tam, Zhi-Rui and
Chang, Wen-Yu and
Wang, Chung-Che",
booktitle = "Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)",
month = nov,
year = "2025",
address = "National Taiwan University, Taipei City, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.rocling-main.54/",
pages = "459--466",
ISBN = "979-8-89176-379-1",
abstract = "This study presents the development of a high-performance automatic speech recognition (ASR) system for Taiwan Hakka, a low-resource language facing challenges in preservation and digitalization. We adopt OpenAI{'}s Whisper large-v3-taiwanese-hakka as the foundation, leveraging its advanced Transformer encoder{--}decoder architecture. To achieve parameter efficiency and adaptability to a new language, we employ the Low-Rank Adaptation (LoRA) fine-tuning strategy, targeting key modules including q{\_}proj, k{\_}proj, v{\_}proj, out{\_}proj, fc1, and fc2. Experimental results demonstrate that the fine-tuned model achieves strong performance on the FSR 2025 HAT-Vol2 test set, with an average character error rate (CER) of 7.07{\%} and an average word error rate (WER) of 40.99{\%}. Training analysis further indicates that both validation loss and error rates consistently decreased and converged, confirming that LoRA enables effective knowledge transfer to Hakka ASR without catastrophic forgetting. These findings provide an efficient and practical solution for speech recognition in low-resource languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2025-study">
<titleInfo>
<title>A Study on a Low-Resource Speech Recognition System for Taiwan Hakka Based on Whisper and LoRA</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zheng-Ting</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng-You</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi-Xiang</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong-Yuan</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhao-Yi</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke-Han</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chih-Kai</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhi-Rui</namePart>
<namePart type="family">Tam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen-Yu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Che</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">National Taiwan University, Taipei City, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-379-1</identifier>
</relatedItem>
<abstract>This study presents the development of a high-performance automatic speech recognition (ASR) system for Taiwan Hakka, a low-resource language facing challenges in preservation and digitalization. We adopt OpenAI’s Whisper large-v3-taiwanese-hakka as the foundation, leveraging its advanced Transformer encoder–decoder architecture. To achieve parameter efficiency and adaptability to a new language, we employ the Low-Rank Adaptation (LoRA) fine-tuning strategy, targeting key modules including q_proj, k_proj, v_proj, out_proj, fc1, and fc2. Experimental results demonstrate that the fine-tuned model achieves strong performance on the FSR 2025 HAT-Vol2 test set, with an average character error rate (CER) of 7.07% and an average word error rate (WER) of 40.99%. Training analysis further indicates that both validation loss and error rates consistently decreased and converged, confirming that LoRA enables effective knowledge transfer to Hakka ASR without catastrophic forgetting. These findings provide an efficient and practical solution for speech recognition in low-resource languages.</abstract>
<identifier type="citekey">liu-etal-2025-study</identifier>
<location>
<url>https://aclanthology.org/2025.rocling-main.54/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>459</start>
<end>466</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Study on a Low-Resource Speech Recognition System for Taiwan Hakka Based on Whisper and LoRA
%A Liu, Zheng-Ting
%A Wang, Heng-You
%A Liao, Yi-Xiang
%A Qiu, Zhong-Yuan
%A Huang, Zhao-Yi
%Y Chang, Kai-Wei
%Y Lu, Ke-Han
%Y Yang, Chih-Kai
%Y Tam, Zhi-Rui
%Y Chang, Wen-Yu
%Y Wang, Chung-Che
%S Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C National Taiwan University, Taipei City, Taiwan
%@ 979-8-89176-379-1
%F liu-etal-2025-study
%X This study presents the development of a high-performance automatic speech recognition (ASR) system for Taiwan Hakka, a low-resource language facing challenges in preservation and digitalization. We adopt OpenAI’s Whisper large-v3-taiwanese-hakka as the foundation, leveraging its advanced Transformer encoder–decoder architecture. To achieve parameter efficiency and adaptability to a new language, we employ the Low-Rank Adaptation (LoRA) fine-tuning strategy, targeting key modules including q_proj, k_proj, v_proj, out_proj, fc1, and fc2. Experimental results demonstrate that the fine-tuned model achieves strong performance on the FSR 2025 HAT-Vol2 test set, with an average character error rate (CER) of 7.07% and an average word error rate (WER) of 40.99%. Training analysis further indicates that both validation loss and error rates consistently decreased and converged, confirming that LoRA enables effective knowledge transfer to Hakka ASR without catastrophic forgetting. These findings provide an efficient and practical solution for speech recognition in low-resource languages.
%U https://aclanthology.org/2025.rocling-main.54/
%P 459-466
Markdown (Informal)
[A Study on a Low-Resource Speech Recognition System for Taiwan Hakka Based on Whisper and LoRA](https://aclanthology.org/2025.rocling-main.54/) (Liu et al., ROCLING 2025)
ACL