@inproceedings{rupak-raj-etal-2023-pronunciation,
title = "Pronunciation-Aware Syllable Tokenizer for {N}epali Automatic Speech Recognition System",
author = "Ghimire, Rupak Raj and
Bal, Bal Krishna and
Prasain, Balaram and
Poudyal, Prakash",
editor = "D. Pawar, Jyoti and
Lalitha Devi, Sobha",
booktitle = "Proceedings of the 20th International Conference on Natural Language Processing (ICON)",
month = dec,
year = "2023",
address = "Goa University, Goa, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2023.icon-1.4/",
pages = "36--43",
abstract = "The Automatic Speech Recognition (ASR) has come up with significant advancements over the course of several decades, transitioning from a rule-based method to a statistical approach, and ultimately to the use of end-to-end (E2E) frameworks. This phenomenon continues with the progression of machine learning and deep learning methodologies. The E2E approach for ASR has demonstrated predominant success in the case of resourceful languages with larger annotated corpus. However, the accuracy is quite low for low-resourced languages such as Nepali. In this regard, language-specific tools such as tokenizers seem to play a vital role in improving the performance of the E2E model for low-resourced languages like Nepali. In this paper, we propose a pronunciationaware syllable tokenizer for the Nepali language which improves the results of the E2E model. Our experiment confirm that the introduction of the proposed tokenizer yields better performance with the Character Error Rate (CER) 8.09{\%} compared to other language-independent tokenizers."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rupak-raj-etal-2023-pronunciation">
<titleInfo>
<title>Pronunciation-Aware Syllable Tokenizer for Nepali Automatic Speech Recognition System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rupak</namePart>
<namePart type="given">Raj</namePart>
<namePart type="family">Ghimire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bal</namePart>
<namePart type="given">Krishna</namePart>
<namePart type="family">Bal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Balaram</namePart>
<namePart type="family">Prasain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prakash</namePart>
<namePart type="family">Poudyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th International Conference on Natural Language Processing (ICON)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jyoti</namePart>
<namePart type="family">D. Pawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">Lalitha Devi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Goa University, Goa, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Automatic Speech Recognition (ASR) has come up with significant advancements over the course of several decades, transitioning from a rule-based method to a statistical approach, and ultimately to the use of end-to-end (E2E) frameworks. This phenomenon continues with the progression of machine learning and deep learning methodologies. The E2E approach for ASR has demonstrated predominant success in the case of resourceful languages with larger annotated corpus. However, the accuracy is quite low for low-resourced languages such as Nepali. In this regard, language-specific tools such as tokenizers seem to play a vital role in improving the performance of the E2E model for low-resourced languages like Nepali. In this paper, we propose a pronunciationaware syllable tokenizer for the Nepali language which improves the results of the E2E model. Our experiment confirm that the introduction of the proposed tokenizer yields better performance with the Character Error Rate (CER) 8.09% compared to other language-independent tokenizers.</abstract>
<identifier type="citekey">rupak-raj-etal-2023-pronunciation</identifier>
<location>
<url>https://aclanthology.org/2023.icon-1.4/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>36</start>
<end>43</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pronunciation-Aware Syllable Tokenizer for Nepali Automatic Speech Recognition System
%A Ghimire, Rupak Raj
%A Bal, Bal Krishna
%A Prasain, Balaram
%A Poudyal, Prakash
%Y D. Pawar, Jyoti
%Y Lalitha Devi, Sobha
%S Proceedings of the 20th International Conference on Natural Language Processing (ICON)
%D 2023
%8 December
%I NLP Association of India (NLPAI)
%C Goa University, Goa, India
%F rupak-raj-etal-2023-pronunciation
%X The Automatic Speech Recognition (ASR) has come up with significant advancements over the course of several decades, transitioning from a rule-based method to a statistical approach, and ultimately to the use of end-to-end (E2E) frameworks. This phenomenon continues with the progression of machine learning and deep learning methodologies. The E2E approach for ASR has demonstrated predominant success in the case of resourceful languages with larger annotated corpus. However, the accuracy is quite low for low-resourced languages such as Nepali. In this regard, language-specific tools such as tokenizers seem to play a vital role in improving the performance of the E2E model for low-resourced languages like Nepali. In this paper, we propose a pronunciationaware syllable tokenizer for the Nepali language which improves the results of the E2E model. Our experiment confirm that the introduction of the proposed tokenizer yields better performance with the Character Error Rate (CER) 8.09% compared to other language-independent tokenizers.
%U https://aclanthology.org/2023.icon-1.4/
%P 36-43
Markdown (Informal)
[Pronunciation-Aware Syllable Tokenizer for Nepali Automatic Speech Recognition System](https://aclanthology.org/2023.icon-1.4/) (Ghimire et al., ICON 2023)
ACL