@inproceedings{nandi-etal-2023-pseudo,
title = "Pseudo-Labeling for Domain-Agnostic {B}angla Automatic Speech Recognition",
author = "Nandi, Rabindra Nath and
Menon, Mehadi and
Muntasir, Tareq and
Sarker, Sagor and
Muhtaseem, Quazi Sarwar and
Islam, Md. Tariqul and
Chowdhury, Shammur and
Alam, Firoj",
editor = "Alam, Firoj and
Kar, Sudipta and
Chowdhury, Shammur Absar and
Sadeque, Farig and
Amin, Ruhul",
booktitle = "Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.banglalp-1.16",
doi = "10.18653/v1/2023.banglalp-1.16",
pages = "152--162",
abstract = "One of the major challenges for developing automatic speech recognition (ASR) for low-resource languages is the limited access to labeled data with domain-specific variations. In this study, we propose a pseudo-labeling approach to develop a large-scale domain-agnostic ASR dataset. With the proposed methodology, we developed a 20k+ hours labeled Bangla speech dataset covering diverse topics, speaking styles, dialects, noisy environments, and conversational scenarios. We then exploited the developed corpus to design a conformer-based ASR system. We benchmarked the trained ASR with publicly available datasets and compared it with other available models. To investigate the efficacy, we designed and developed a human-annotated domain-agnostic test set composed of news, telephony, and conversational data among others. Our results demonstrate the efficacy of the model trained on psuedo-label data for the designed test-set along with publicly-available Bangla datasets. The experimental resources will be publicly available.https://github.com/hishab-nlp/Pseudo-Labeling-for-Domain-Agnostic-Bangla-ASR",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nandi-etal-2023-pseudo">
<titleInfo>
<title>Pseudo-Labeling for Domain-Agnostic Bangla Automatic Speech Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rabindra</namePart>
<namePart type="given">Nath</namePart>
<namePart type="family">Nandi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehadi</namePart>
<namePart type="family">Menon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tareq</namePart>
<namePart type="family">Muntasir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sagor</namePart>
<namePart type="family">Sarker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Quazi</namePart>
<namePart type="given">Sarwar</namePart>
<namePart type="family">Muhtaseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Tariqul</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Firoj</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Kar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shammur</namePart>
<namePart type="given">Absar</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Farig</namePart>
<namePart type="family">Sadeque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruhul</namePart>
<namePart type="family">Amin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the major challenges for developing automatic speech recognition (ASR) for low-resource languages is the limited access to labeled data with domain-specific variations. In this study, we propose a pseudo-labeling approach to develop a large-scale domain-agnostic ASR dataset. With the proposed methodology, we developed a 20k+ hours labeled Bangla speech dataset covering diverse topics, speaking styles, dialects, noisy environments, and conversational scenarios. We then exploited the developed corpus to design a conformer-based ASR system. We benchmarked the trained ASR with publicly available datasets and compared it with other available models. To investigate the efficacy, we designed and developed a human-annotated domain-agnostic test set composed of news, telephony, and conversational data among others. Our results demonstrate the efficacy of the model trained on psuedo-label data for the designed test-set along with publicly-available Bangla datasets. The experimental resources will be publicly available.https://github.com/hishab-nlp/Pseudo-Labeling-for-Domain-Agnostic-Bangla-ASR</abstract>
<identifier type="citekey">nandi-etal-2023-pseudo</identifier>
<identifier type="doi">10.18653/v1/2023.banglalp-1.16</identifier>
<location>
<url>https://aclanthology.org/2023.banglalp-1.16</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>152</start>
<end>162</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pseudo-Labeling for Domain-Agnostic Bangla Automatic Speech Recognition
%A Nandi, Rabindra Nath
%A Menon, Mehadi
%A Muntasir, Tareq
%A Sarker, Sagor
%A Muhtaseem, Quazi Sarwar
%A Islam, Md. Tariqul
%A Chowdhury, Shammur
%A Alam, Firoj
%Y Alam, Firoj
%Y Kar, Sudipta
%Y Chowdhury, Shammur Absar
%Y Sadeque, Farig
%Y Amin, Ruhul
%S Proceedings of the First Workshop on Bangla Language Processing (BLP-2023)
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F nandi-etal-2023-pseudo
%X One of the major challenges for developing automatic speech recognition (ASR) for low-resource languages is the limited access to labeled data with domain-specific variations. In this study, we propose a pseudo-labeling approach to develop a large-scale domain-agnostic ASR dataset. With the proposed methodology, we developed a 20k+ hours labeled Bangla speech dataset covering diverse topics, speaking styles, dialects, noisy environments, and conversational scenarios. We then exploited the developed corpus to design a conformer-based ASR system. We benchmarked the trained ASR with publicly available datasets and compared it with other available models. To investigate the efficacy, we designed and developed a human-annotated domain-agnostic test set composed of news, telephony, and conversational data among others. Our results demonstrate the efficacy of the model trained on psuedo-label data for the designed test-set along with publicly-available Bangla datasets. The experimental resources will be publicly available.https://github.com/hishab-nlp/Pseudo-Labeling-for-Domain-Agnostic-Bangla-ASR
%R 10.18653/v1/2023.banglalp-1.16
%U https://aclanthology.org/2023.banglalp-1.16
%U https://doi.org/10.18653/v1/2023.banglalp-1.16
%P 152-162
Markdown (Informal)
[Pseudo-Labeling for Domain-Agnostic Bangla Automatic Speech Recognition](https://aclanthology.org/2023.banglalp-1.16) (Nandi et al., BanglaLP 2023)
ACL
- Rabindra Nath Nandi, Mehadi Menon, Tareq Muntasir, Sagor Sarker, Quazi Sarwar Muhtaseem, Md. Tariqul Islam, Shammur Chowdhury, and Firoj Alam. 2023. Pseudo-Labeling for Domain-Agnostic Bangla Automatic Speech Recognition. In Proceedings of the First Workshop on Bangla Language Processing (BLP-2023), pages 152–162, Singapore. Association for Computational Linguistics.