@inproceedings{evans-etal-2022-developing,
title = "Developing Automatic Speech Recognition for {S}cottish {G}aelic",
author = "Evans, Lucy and
Lamb, William and
Sinclair, Mark and
Alex, Beatrice",
editor = "Fransen, Theodorus and
Lamb, William and
Prys, Delyth",
booktitle = "Proceedings of the 4th Celtic Language Technology Workshop within LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.cltw-1.16",
pages = "110--120",
abstract = "This paper discusses our efforts to develop a full automatic speech recognition (ASR) system for Scottish Gaelic, starting from a point of limited resource. Building ASR technology is important for documenting and revitalising endangered languages; it enables existing resources to be enhanced with automatic subtitles and transcriptions, improves accessibility for users, and, in turn, encourages continued use of the language. In this paper, we explain the many difficulties faced when collecting minority language data for speech recognition. A novel cross-lingual approach to the alignment of training data is used to overcome one such difficulty, and in this way we demonstrate how majority language resources can bootstrap the development of lower-resourced language technology. We use the Kaldi speech recognition toolkit to develop several Gaelic ASR systems, and report a final WER of 26.30{\%}. This is a 9.50{\%} improvement on our original model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="evans-etal-2022-developing">
<titleInfo>
<title>Developing Automatic Speech Recognition for Scottish Gaelic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucy</namePart>
<namePart type="family">Evans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lamb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Sinclair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beatrice</namePart>
<namePart type="family">Alex</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Celtic Language Technology Workshop within LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Theodorus</namePart>
<namePart type="family">Fransen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Lamb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delyth</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper discusses our efforts to develop a full automatic speech recognition (ASR) system for Scottish Gaelic, starting from a point of limited resource. Building ASR technology is important for documenting and revitalising endangered languages; it enables existing resources to be enhanced with automatic subtitles and transcriptions, improves accessibility for users, and, in turn, encourages continued use of the language. In this paper, we explain the many difficulties faced when collecting minority language data for speech recognition. A novel cross-lingual approach to the alignment of training data is used to overcome one such difficulty, and in this way we demonstrate how majority language resources can bootstrap the development of lower-resourced language technology. We use the Kaldi speech recognition toolkit to develop several Gaelic ASR systems, and report a final WER of 26.30%. This is a 9.50% improvement on our original model.</abstract>
<identifier type="citekey">evans-etal-2022-developing</identifier>
<location>
<url>https://aclanthology.org/2022.cltw-1.16</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>110</start>
<end>120</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing Automatic Speech Recognition for Scottish Gaelic
%A Evans, Lucy
%A Lamb, William
%A Sinclair, Mark
%A Alex, Beatrice
%Y Fransen, Theodorus
%Y Lamb, William
%Y Prys, Delyth
%S Proceedings of the 4th Celtic Language Technology Workshop within LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F evans-etal-2022-developing
%X This paper discusses our efforts to develop a full automatic speech recognition (ASR) system for Scottish Gaelic, starting from a point of limited resource. Building ASR technology is important for documenting and revitalising endangered languages; it enables existing resources to be enhanced with automatic subtitles and transcriptions, improves accessibility for users, and, in turn, encourages continued use of the language. In this paper, we explain the many difficulties faced when collecting minority language data for speech recognition. A novel cross-lingual approach to the alignment of training data is used to overcome one such difficulty, and in this way we demonstrate how majority language resources can bootstrap the development of lower-resourced language technology. We use the Kaldi speech recognition toolkit to develop several Gaelic ASR systems, and report a final WER of 26.30%. This is a 9.50% improvement on our original model.
%U https://aclanthology.org/2022.cltw-1.16
%P 110-120
Markdown (Informal)
[Developing Automatic Speech Recognition for Scottish Gaelic](https://aclanthology.org/2022.cltw-1.16) (Evans et al., CLTW 2022)
ACL