@inproceedings{ritchie-etal-2020-data,
title = "Data-Driven Parametric Text Normalization: Rapidly Scaling Finite-State Transduction Verbalizers to New Languages",
author = "Ritchie, Sandy and
Mahon, Eoin and
Heiligenstein, Kim and
Bampounis, Nikos and
van Esch, Daan and
Schallhart, Christian and
Mortensen, Jonas and
Brard, Benoit",
editor = "Beermann, Dorothee and
Besacier, Laurent and
Sakti, Sakriani and
Soria, Claudia",
booktitle = "Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources association",
url = "https://aclanthology.org/2020.sltu-1.30/",
pages = "218--225",
language = "eng",
ISBN = "979-10-95546-35-1",
abstract = "This paper presents a methodology for rapidly generating FST-based verbalizers for ASR and TTS systems by efficiently sourcing language-specific data. We describe a questionnaire which collects the necessary data to bootstrap the number grammar induction system and parameterize the verbalizer templates described in Ritchie et al. (2019), and a machine-readable data store which allows the data collected through the questionnaire to be supplemented by additional data from other sources. This system allows us to rapidly scale technologies such as ASR and TTS to more languages, including low-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ritchie-etal-2020-data">
<titleInfo>
<title>Data-Driven Parametric Text Normalization: Rapidly Scaling Finite-State Transduction Verbalizers to New Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandy</namePart>
<namePart type="family">Ritchie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eoin</namePart>
<namePart type="family">Mahon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="family">Heiligenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikos</namePart>
<namePart type="family">Bampounis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daan</namePart>
<namePart type="family">van Esch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Schallhart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Mortensen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoit</namePart>
<namePart type="family">Brard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dorothee</namePart>
<namePart type="family">Beermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Besacier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Soria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-35-1</identifier>
</relatedItem>
<abstract>This paper presents a methodology for rapidly generating FST-based verbalizers for ASR and TTS systems by efficiently sourcing language-specific data. We describe a questionnaire which collects the necessary data to bootstrap the number grammar induction system and parameterize the verbalizer templates described in Ritchie et al. (2019), and a machine-readable data store which allows the data collected through the questionnaire to be supplemented by additional data from other sources. This system allows us to rapidly scale technologies such as ASR and TTS to more languages, including low-resource languages.</abstract>
<identifier type="citekey">ritchie-etal-2020-data</identifier>
<location>
<url>https://aclanthology.org/2020.sltu-1.30/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>218</start>
<end>225</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-Driven Parametric Text Normalization: Rapidly Scaling Finite-State Transduction Verbalizers to New Languages
%A Ritchie, Sandy
%A Mahon, Eoin
%A Heiligenstein, Kim
%A Bampounis, Nikos
%A van Esch, Daan
%A Schallhart, Christian
%A Mortensen, Jonas
%A Brard, Benoit
%Y Beermann, Dorothee
%Y Besacier, Laurent
%Y Sakti, Sakriani
%Y Soria, Claudia
%S Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)
%D 2020
%8 May
%I European Language Resources association
%C Marseille, France
%@ 979-10-95546-35-1
%G eng
%F ritchie-etal-2020-data
%X This paper presents a methodology for rapidly generating FST-based verbalizers for ASR and TTS systems by efficiently sourcing language-specific data. We describe a questionnaire which collects the necessary data to bootstrap the number grammar induction system and parameterize the verbalizer templates described in Ritchie et al. (2019), and a machine-readable data store which allows the data collected through the questionnaire to be supplemented by additional data from other sources. This system allows us to rapidly scale technologies such as ASR and TTS to more languages, including low-resource languages.
%U https://aclanthology.org/2020.sltu-1.30/
%P 218-225
Markdown (Informal)
[Data-Driven Parametric Text Normalization: Rapidly Scaling Finite-State Transduction Verbalizers to New Languages](https://aclanthology.org/2020.sltu-1.30/) (Ritchie et al., SLTU 2020)
ACL
- Sandy Ritchie, Eoin Mahon, Kim Heiligenstein, Nikos Bampounis, Daan van Esch, Christian Schallhart, Jonas Mortensen, and Benoit Brard. 2020. Data-Driven Parametric Text Normalization: Rapidly Scaling Finite-State Transduction Verbalizers to New Languages. In Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL), pages 218–225, Marseille, France. European Language Resources association.