@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,
title = "Crowdsourcing {L}atin {A}merican {S}panish for Low-Resource Text-to-Speech",
author = "Guevara-Rukoz, Adriana and
Demirsahin, Isin and
He, Fei and
Chu, Shan-Hui Cathy and
Sarin, Supheakmungkol and
Pipatsrisawat, Knot and
Gutkin, Alexander and
Butryna, Alena and
Kjartansson, Oddur",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.801",
pages = "6504--6513",
abstract = "In this paper we present a multidialectal corpus approach for building a text-to-speech voice for a new dialect in a language with existing resources, focusing on various South American dialects of Spanish. We first present public speech datasets for Argentinian, Chilean, Colombian, Peruvian, Puerto Rican and Venezuelan Spanish specifically constructed with text-to-speech applications in mind using crowd-sourcing. We then compare the monodialectal voices built with minimal data to a multidialectal model built by pooling all the resources from all dialects. Our results show that the multidialectal model outperforms the monodialectal baseline models. We also experiment with a {``}zero-resource{''} dialect scenario where we build a multidialectal voice for a dialect while holding out target dialect recordings from the training data.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guevara-rukoz-etal-2020-crowdsourcing">
<titleInfo>
<title>Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adriana</namePart>
<namePart type="family">Guevara-Rukoz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isin</namePart>
<namePart type="family">Demirsahin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fei</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shan-Hui</namePart>
<namePart type="given">Cathy</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Supheakmungkol</namePart>
<namePart type="family">Sarin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Knot</namePart>
<namePart type="family">Pipatsrisawat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Gutkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alena</namePart>
<namePart type="family">Butryna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oddur</namePart>
<namePart type="family">Kjartansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>In this paper we present a multidialectal corpus approach for building a text-to-speech voice for a new dialect in a language with existing resources, focusing on various South American dialects of Spanish. We first present public speech datasets for Argentinian, Chilean, Colombian, Peruvian, Puerto Rican and Venezuelan Spanish specifically constructed with text-to-speech applications in mind using crowd-sourcing. We then compare the monodialectal voices built with minimal data to a multidialectal model built by pooling all the resources from all dialects. Our results show that the multidialectal model outperforms the monodialectal baseline models. We also experiment with a “zero-resource” dialect scenario where we build a multidialectal voice for a dialect while holding out target dialect recordings from the training data.</abstract>
<identifier type="citekey">guevara-rukoz-etal-2020-crowdsourcing</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.801</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6504</start>
<end>6513</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech
%A Guevara-Rukoz, Adriana
%A Demirsahin, Isin
%A He, Fei
%A Chu, Shan-Hui Cathy
%A Sarin, Supheakmungkol
%A Pipatsrisawat, Knot
%A Gutkin, Alexander
%A Butryna, Alena
%A Kjartansson, Oddur
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F guevara-rukoz-etal-2020-crowdsourcing
%X In this paper we present a multidialectal corpus approach for building a text-to-speech voice for a new dialect in a language with existing resources, focusing on various South American dialects of Spanish. We first present public speech datasets for Argentinian, Chilean, Colombian, Peruvian, Puerto Rican and Venezuelan Spanish specifically constructed with text-to-speech applications in mind using crowd-sourcing. We then compare the monodialectal voices built with minimal data to a multidialectal model built by pooling all the resources from all dialects. Our results show that the multidialectal model outperforms the monodialectal baseline models. We also experiment with a “zero-resource” dialect scenario where we build a multidialectal voice for a dialect while holding out target dialect recordings from the training data.
%U https://aclanthology.org/2020.lrec-1.801
%P 6504-6513
Markdown (Informal)
[Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech](https://aclanthology.org/2020.lrec-1.801) (Guevara-Rukoz et al., LREC 2020)
ACL
- Adriana Guevara-Rukoz, Isin Demirsahin, Fei He, Shan-Hui Cathy Chu, Supheakmungkol Sarin, Knot Pipatsrisawat, Alexander Gutkin, Alena Butryna, and Oddur Kjartansson. 2020. Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 6504–6513, Marseille, France. European Language Resources Association.