@inproceedings{r-etal-2024-cen,
title = "{CEN}{\_}{A}mrita@{LT}-{EDI} 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in {T}amil",
author = "R, Jairam and
G, Jyothish and
B, Premjith and
M, Viswa",
editor = {Chakravarthi, Bharathi Raja and
B, Bharathi and
Buitelaar, Paul and
Durairaj, Thenmozhi and
Kov{\'a}cs, Gy{\"o}rgy and
Garc{\'\i}a Cumbreras, Miguel {\'A}ngel},
booktitle = "Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.ltedi-1.21",
pages = "190--195",
abstract = "Speech recognition is known to be a specialized application of speech processing. Automatic speech recognition (ASR) systems are designed to perform the speech-to-text task. Although ASR systems have been the subject of extensive research, they still encounter certain challenges when speech variations arise. The speaker{'}s age, gender, vulnerability, and other factors are the main causes of the variations in speech. In this work, we propose a fine-tuned speech recognition model for recognising the spoken words of vulnerable individuals in Tamil. This research utilizes a dataset sourced from the LT-EDI@EACL2024 shared task. We trained and tested pre-trained ASR models, including XLS-R and Whisper. The findings highlight that the fine-tuned Whisper ASR model surpasses the XLSR, achieving a word error rate (WER) of 24.452, signifying its superior performance in recognizing speech from diverse individuals.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="r-etal-2024-cen">
<titleInfo>
<title>CEN_Amrita@LT-EDI 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jairam</namePart>
<namePart type="family">R</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jyothish</namePart>
<namePart type="family">G</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Premjith</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viswa</namePart>
<namePart type="family">M</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Buitelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thenmozhi</namePart>
<namePart type="family">Durairaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">György</namePart>
<namePart type="family">Kovács</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">Ángel</namePart>
<namePart type="family">García Cumbreras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Speech recognition is known to be a specialized application of speech processing. Automatic speech recognition (ASR) systems are designed to perform the speech-to-text task. Although ASR systems have been the subject of extensive research, they still encounter certain challenges when speech variations arise. The speaker’s age, gender, vulnerability, and other factors are the main causes of the variations in speech. In this work, we propose a fine-tuned speech recognition model for recognising the spoken words of vulnerable individuals in Tamil. This research utilizes a dataset sourced from the LT-EDI@EACL2024 shared task. We trained and tested pre-trained ASR models, including XLS-R and Whisper. The findings highlight that the fine-tuned Whisper ASR model surpasses the XLSR, achieving a word error rate (WER) of 24.452, signifying its superior performance in recognizing speech from diverse individuals.</abstract>
<identifier type="citekey">r-etal-2024-cen</identifier>
<location>
<url>https://aclanthology.org/2024.ltedi-1.21</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>190</start>
<end>195</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CEN_Amrita@LT-EDI 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in Tamil
%A R, Jairam
%A G, Jyothish
%A B, Premjith
%A M, Viswa
%Y Chakravarthi, Bharathi Raja
%Y B, Bharathi
%Y Buitelaar, Paul
%Y Durairaj, Thenmozhi
%Y Kovács, György
%Y García Cumbreras, Miguel Ángel
%S Proceedings of the Fourth Workshop on Language Technology for Equality, Diversity, Inclusion
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F r-etal-2024-cen
%X Speech recognition is known to be a specialized application of speech processing. Automatic speech recognition (ASR) systems are designed to perform the speech-to-text task. Although ASR systems have been the subject of extensive research, they still encounter certain challenges when speech variations arise. The speaker’s age, gender, vulnerability, and other factors are the main causes of the variations in speech. In this work, we propose a fine-tuned speech recognition model for recognising the spoken words of vulnerable individuals in Tamil. This research utilizes a dataset sourced from the LT-EDI@EACL2024 shared task. We trained and tested pre-trained ASR models, including XLS-R and Whisper. The findings highlight that the fine-tuned Whisper ASR model surpasses the XLSR, achieving a word error rate (WER) of 24.452, signifying its superior performance in recognizing speech from diverse individuals.
%U https://aclanthology.org/2024.ltedi-1.21
%P 190-195
Markdown (Informal)
[CEN_Amrita@LT-EDI 2024: A Transformer based Speech Recognition System for Vulnerable Individuals in Tamil](https://aclanthology.org/2024.ltedi-1.21) (R et al., LTEDI-WS 2024)
ACL