@inproceedings{meelen-etal-2024-end,
title = "End-to-End Speech Recognition for Endangered Languages of {N}epal",
author = "Meelen, Marieke and
O{'}neill, Alexander and
Coto-Solano, Rolando",
editor = "Moeller, Sarah and
Agyapong, Godfred and
Arppe, Antti and
Chaudhary, Aditi and
Rijhwani, Shruti and
Cox, Christopher and
Henke, Ryan and
Palmer, Alexis and
Rosenblum, Daisy and
Schwartz, Lane",
booktitle = "Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.computel-1.12/",
pages = "83--93",
abstract = "This paper presents three experiments to test the most effective and efficient ASR pipeline to facilitate the documentation and preservation of endangered languages, which are often extremely low-resourced. With data from two languages in Nepal {---}Dzardzongke and Newar{---} we show that model improvements are different for different masses of data, and that transfer learning as well as a range of modifications (e.g. normalising amplitude and pitch) can be effective, but that a consistently-standardised orthography as NLP input and post-training dictionary corrections improve results even more."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="meelen-etal-2024-end">
<titleInfo>
<title>End-to-End Speech Recognition for Endangered Languages of Nepal</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marieke</namePart>
<namePart type="family">Meelen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">O’neill</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Moeller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Godfred</namePart>
<namePart type="family">Agyapong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antti</namePart>
<namePart type="family">Arppe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditi</namePart>
<namePart type="family">Chaudhary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cox</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Henke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daisy</namePart>
<namePart type="family">Rosenblum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lane</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents three experiments to test the most effective and efficient ASR pipeline to facilitate the documentation and preservation of endangered languages, which are often extremely low-resourced. With data from two languages in Nepal —Dzardzongke and Newar— we show that model improvements are different for different masses of data, and that transfer learning as well as a range of modifications (e.g. normalising amplitude and pitch) can be effective, but that a consistently-standardised orthography as NLP input and post-training dictionary corrections improve results even more.</abstract>
<identifier type="citekey">meelen-etal-2024-end</identifier>
<location>
<url>https://aclanthology.org/2024.computel-1.12/</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>83</start>
<end>93</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T End-to-End Speech Recognition for Endangered Languages of Nepal
%A Meelen, Marieke
%A O’neill, Alexander
%A Coto-Solano, Rolando
%Y Moeller, Sarah
%Y Agyapong, Godfred
%Y Arppe, Antti
%Y Chaudhary, Aditi
%Y Rijhwani, Shruti
%Y Cox, Christopher
%Y Henke, Ryan
%Y Palmer, Alexis
%Y Rosenblum, Daisy
%Y Schwartz, Lane
%S Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F meelen-etal-2024-end
%X This paper presents three experiments to test the most effective and efficient ASR pipeline to facilitate the documentation and preservation of endangered languages, which are often extremely low-resourced. With data from two languages in Nepal —Dzardzongke and Newar— we show that model improvements are different for different masses of data, and that transfer learning as well as a range of modifications (e.g. normalising amplitude and pitch) can be effective, but that a consistently-standardised orthography as NLP input and post-training dictionary corrections improve results even more.
%U https://aclanthology.org/2024.computel-1.12/
%P 83-93
Markdown (Informal)
[End-to-End Speech Recognition for Endangered Languages of Nepal](https://aclanthology.org/2024.computel-1.12/) (Meelen et al., ComputEL 2024)
ACL
- Marieke Meelen, Alexander O’neill, and Rolando Coto-Solano. 2024. End-to-End Speech Recognition for Endangered Languages of Nepal. In Proceedings of the Seventh Workshop on the Use of Computational Methods in the Study of Endangered Languages, pages 83–93, St. Julians, Malta. Association for Computational Linguistics.