@inproceedings{lakshminarayanan-prudhommeaux-2024-exploring,
title = "Exploring the impact of noise in low-resource {ASR} for {T}amil",
author = "Lakshminarayanan, Vigneshwar and
Prud{'}hommeaux, Emily",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Nadarajan, Rajeswari and
Ravikiran, Manikandan",
booktitle = "Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.dravidianlangtech-1.5",
pages = "30--34",
abstract = "The use of deep learning algorithms has resulted in significant progress in automatic speech recognition (ASR). Robust high-accuracy ASR models typically require thousands or tens of thousands of hours of speech data, but even the strongest models tend fail under noisy conditions. Unsurprisingly, the impact of noise on accuracy is more drastic in low-resource settings. In this paper, we investigate the impact of noise on ASR in a low-resource setting. We explore novel methods for developing noise-robust ASR models using a a small dataset for Tamil, a widely-spoken but under-resourced Dravidian languages. We add various noises to the audio data to determine the impact of different kinds of noise (e.g., punctuated vs. constant, man-made vs natural) We also explore the relationship between different data augmentation methods are better suited to handling different types of noise. Our results show that all noises, regardless of the type, had an impact on ASR performance, and that upgrading the architecture alone could not mitigate the impact of noise. SpecAugment, the most common data augmentation method, was not as helpful as raw data augmentation, in which noise is explicitly added to the training data. Raw data augmentation enhances ASR performance on both clean data and noise-mixed data.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lakshminarayanan-prudhommeaux-2024-exploring">
<titleInfo>
<title>Exploring the impact of noise in low-resource ASR for Tamil</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vigneshwar</namePart>
<namePart type="family">Lakshminarayanan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Prud’hommeaux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajeswari</namePart>
<namePart type="family">Nadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manikandan</namePart>
<namePart type="family">Ravikiran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The use of deep learning algorithms has resulted in significant progress in automatic speech recognition (ASR). Robust high-accuracy ASR models typically require thousands or tens of thousands of hours of speech data, but even the strongest models tend fail under noisy conditions. Unsurprisingly, the impact of noise on accuracy is more drastic in low-resource settings. In this paper, we investigate the impact of noise on ASR in a low-resource setting. We explore novel methods for developing noise-robust ASR models using a a small dataset for Tamil, a widely-spoken but under-resourced Dravidian languages. We add various noises to the audio data to determine the impact of different kinds of noise (e.g., punctuated vs. constant, man-made vs natural) We also explore the relationship between different data augmentation methods are better suited to handling different types of noise. Our results show that all noises, regardless of the type, had an impact on ASR performance, and that upgrading the architecture alone could not mitigate the impact of noise. SpecAugment, the most common data augmentation method, was not as helpful as raw data augmentation, in which noise is explicitly added to the training data. Raw data augmentation enhances ASR performance on both clean data and noise-mixed data.</abstract>
<identifier type="citekey">lakshminarayanan-prudhommeaux-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.dravidianlangtech-1.5</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>30</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring the impact of noise in low-resource ASR for Tamil
%A Lakshminarayanan, Vigneshwar
%A Prud’hommeaux, Emily
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%Y Nadarajan, Rajeswari
%Y Ravikiran, Manikandan
%S Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F lakshminarayanan-prudhommeaux-2024-exploring
%X The use of deep learning algorithms has resulted in significant progress in automatic speech recognition (ASR). Robust high-accuracy ASR models typically require thousands or tens of thousands of hours of speech data, but even the strongest models tend fail under noisy conditions. Unsurprisingly, the impact of noise on accuracy is more drastic in low-resource settings. In this paper, we investigate the impact of noise on ASR in a low-resource setting. We explore novel methods for developing noise-robust ASR models using a a small dataset for Tamil, a widely-spoken but under-resourced Dravidian languages. We add various noises to the audio data to determine the impact of different kinds of noise (e.g., punctuated vs. constant, man-made vs natural) We also explore the relationship between different data augmentation methods are better suited to handling different types of noise. Our results show that all noises, regardless of the type, had an impact on ASR performance, and that upgrading the architecture alone could not mitigate the impact of noise. SpecAugment, the most common data augmentation method, was not as helpful as raw data augmentation, in which noise is explicitly added to the training data. Raw data augmentation enhances ASR performance on both clean data and noise-mixed data.
%U https://aclanthology.org/2024.dravidianlangtech-1.5
%P 30-34
Markdown (Informal)
[Exploring the impact of noise in low-resource ASR for Tamil](https://aclanthology.org/2024.dravidianlangtech-1.5) (Lakshminarayanan & Prud’hommeaux, DravidianLangTech-WS 2024)
ACL
- Vigneshwar Lakshminarayanan and Emily Prud’hommeaux. 2024. Exploring the impact of noise in low-resource ASR for Tamil. In Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages, pages 30–34, St. Julian's, Malta. Association for Computational Linguistics.