@inproceedings{hu-etal-2025-multi-module,
title = "A Multi-Module Error Detection and Correction System for {H}akka {ASR}",
author = "Hu, Min-Chun and
Xiao, Yu-Lin and
Lu, Wen-Hsiang",
editor = "Chang, Kai-Wei and
Lu, Ke-Han and
Yang, Chih-Kai and
Tam, Zhi-Rui and
Chang, Wen-Yu and
Wang, Chung-Che",
booktitle = "Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)",
month = nov,
year = "2025",
address = "National Taiwan University, Taipei City, Taiwan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.rocling-main.58/",
pages = "481--488",
ISBN = "979-8-89176-379-1",
abstract = "本研究提出一個針對客語(以大埔/詔安腔為主)的自動語音辨識(ASR)後矯正系統,旨在解決低資源語言辨識錯誤率偏高的問題。客語因受限於語料規模、異體字與腔調差異,在既有的通用 ASR 模型上表現往往不佳。為此,我們首先以 Whisper Large v3 Turbo 為基底辨識模型,使用約 60 小時的大埔與詔安語料進行微調,以提升對特定腔調的適應性。在獲取 ASR N-best 候選句後,系統進一步透過多模組錯誤偵測矯正流程進行修正,包含四個主要步驟: (1) 潛在錯誤偵測,用於鎖定候選間錯誤的候選詞彙;(2) 音素混淆集偵測(Phoneme Confusion Set): 依據音素相近關係提供可能替代詞;(3) 辭典(Lexicon)修正: 確保詞彙存在於語言使用的實際範疇中,(4) 搭配詞關聯度偵測: 利用收集之語料所建立的搭配詞關聯度來偵測錯誤詞彙。本研究所提出的矯正機制能有效補足 ASR 在低資源語言中的不足,實驗顯示經過多階段錯誤偵測矯正後,最終CER減少至 15.49{\%},減少 2.14 {\%} ,證明該方法能有效提升語音辨識的準確率。"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hu-etal-2025-multi-module">
<titleInfo>
<title>A Multi-Module Error Detection and Correction System for Hakka ASR</title>
</titleInfo>
<name type="personal">
<namePart type="given">Min-Chun</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu-Lin</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen-Hsiang</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ke-Han</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chih-Kai</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhi-Rui</namePart>
<namePart type="family">Tam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen-Yu</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chung-Che</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">National Taiwan University, Taipei City, Taiwan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-379-1</identifier>
</relatedItem>
<abstract>本研究提出一個針對客語(以大埔/詔安腔為主)的自動語音辨識(ASR)後矯正系統,旨在解決低資源語言辨識錯誤率偏高的問題。客語因受限於語料規模、異體字與腔調差異,在既有的通用 ASR 模型上表現往往不佳。為此,我們首先以 Whisper Large v3 Turbo 為基底辨識模型,使用約 60 小時的大埔與詔安語料進行微調,以提升對特定腔調的適應性。在獲取 ASR N-best 候選句後,系統進一步透過多模組錯誤偵測矯正流程進行修正,包含四個主要步驟: (1) 潛在錯誤偵測,用於鎖定候選間錯誤的候選詞彙;(2) 音素混淆集偵測(Phoneme Confusion Set): 依據音素相近關係提供可能替代詞;(3) 辭典(Lexicon)修正: 確保詞彙存在於語言使用的實際範疇中,(4) 搭配詞關聯度偵測: 利用收集之語料所建立的搭配詞關聯度來偵測錯誤詞彙。本研究所提出的矯正機制能有效補足 ASR 在低資源語言中的不足,實驗顯示經過多階段錯誤偵測矯正後,最終CER減少至 15.49%,減少 2.14 % ,證明該方法能有效提升語音辨識的準確率。</abstract>
<identifier type="citekey">hu-etal-2025-multi-module</identifier>
<location>
<url>https://aclanthology.org/2025.rocling-main.58/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>481</start>
<end>488</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Multi-Module Error Detection and Correction System for Hakka ASR
%A Hu, Min-Chun
%A Xiao, Yu-Lin
%A Lu, Wen-Hsiang
%Y Chang, Kai-Wei
%Y Lu, Ke-Han
%Y Yang, Chih-Kai
%Y Tam, Zhi-Rui
%Y Chang, Wen-Yu
%Y Wang, Chung-Che
%S Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C National Taiwan University, Taipei City, Taiwan
%@ 979-8-89176-379-1
%F hu-etal-2025-multi-module
%X 本研究提出一個針對客語(以大埔/詔安腔為主)的自動語音辨識(ASR)後矯正系統,旨在解決低資源語言辨識錯誤率偏高的問題。客語因受限於語料規模、異體字與腔調差異,在既有的通用 ASR 模型上表現往往不佳。為此,我們首先以 Whisper Large v3 Turbo 為基底辨識模型,使用約 60 小時的大埔與詔安語料進行微調,以提升對特定腔調的適應性。在獲取 ASR N-best 候選句後,系統進一步透過多模組錯誤偵測矯正流程進行修正,包含四個主要步驟: (1) 潛在錯誤偵測,用於鎖定候選間錯誤的候選詞彙;(2) 音素混淆集偵測(Phoneme Confusion Set): 依據音素相近關係提供可能替代詞;(3) 辭典(Lexicon)修正: 確保詞彙存在於語言使用的實際範疇中,(4) 搭配詞關聯度偵測: 利用收集之語料所建立的搭配詞關聯度來偵測錯誤詞彙。本研究所提出的矯正機制能有效補足 ASR 在低資源語言中的不足,實驗顯示經過多階段錯誤偵測矯正後,最終CER減少至 15.49%,減少 2.14 % ,證明該方法能有效提升語音辨識的準確率。
%U https://aclanthology.org/2025.rocling-main.58/
%P 481-488
Markdown (Informal)
[A Multi-Module Error Detection and Correction System for Hakka ASR](https://aclanthology.org/2025.rocling-main.58/) (Hu et al., ROCLING 2025)
ACL
- Min-Chun Hu, Yu-Lin Xiao, and Wen-Hsiang Lu. 2025. A Multi-Module Error Detection and Correction System for Hakka ASR. In Proceedings of the 37th Conference on Computational Linguistics and Speech Processing (ROCLING 2025), pages 481–488, National Taiwan University, Taipei City, Taiwan. Association for Computational Linguistics.