@inproceedings{elneima-binkowski-2022-adversarial,
title = "Adversarial Text-to-Speech for low-resource languages",
author = "Elneima, Ashraf and
Bi{\'n}kowski, Miko{\l}aj",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wanlp-1.8",
doi = "10.18653/v1/2022.wanlp-1.8",
pages = "76--84",
abstract = "In this paper we propose a new method for training adversarial text-to-speech (TTS) models for low-resource languages using auxiliary data. Specifically, we modify the MelGAN (Kumar et al., 2019) architecture to achieve better performance in Arabic speech generation, exploring multiple additional datasets and architectural choices, which involved extra discriminators designed to exploit high-frequency similarities between languages. In our evaluation, we used subjective human evaluation, MOS-Mean Opinion Score, and a novel quantitative metric, the Fr{\'e}chet Wav2Vec Distance, which we found to be well correlated with MOS. Both subjectively and quantitatively, our method outperformed the standard MelGAN model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elneima-binkowski-2022-adversarial">
<titleInfo>
<title>Adversarial Text-to-Speech for low-resource languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ashraf</namePart>
<namePart type="family">Elneima</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mikołaj</namePart>
<namePart type="family">Bińkowski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we propose a new method for training adversarial text-to-speech (TTS) models for low-resource languages using auxiliary data. Specifically, we modify the MelGAN (Kumar et al., 2019) architecture to achieve better performance in Arabic speech generation, exploring multiple additional datasets and architectural choices, which involved extra discriminators designed to exploit high-frequency similarities between languages. In our evaluation, we used subjective human evaluation, MOS-Mean Opinion Score, and a novel quantitative metric, the Fréchet Wav2Vec Distance, which we found to be well correlated with MOS. Both subjectively and quantitatively, our method outperformed the standard MelGAN model.</abstract>
<identifier type="citekey">elneima-binkowski-2022-adversarial</identifier>
<identifier type="doi">10.18653/v1/2022.wanlp-1.8</identifier>
<location>
<url>https://aclanthology.org/2022.wanlp-1.8</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>76</start>
<end>84</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adversarial Text-to-Speech for low-resource languages
%A Elneima, Ashraf
%A Bińkowski, Mikołaj
%Y Bouamor, Houda
%Y Al-Khalifa, Hend
%Y Darwish, Kareem
%Y Rambow, Owen
%Y Bougares, Fethi
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Khalifa, Salam
%Y Zaghouani, Wajdi
%S Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F elneima-binkowski-2022-adversarial
%X In this paper we propose a new method for training adversarial text-to-speech (TTS) models for low-resource languages using auxiliary data. Specifically, we modify the MelGAN (Kumar et al., 2019) architecture to achieve better performance in Arabic speech generation, exploring multiple additional datasets and architectural choices, which involved extra discriminators designed to exploit high-frequency similarities between languages. In our evaluation, we used subjective human evaluation, MOS-Mean Opinion Score, and a novel quantitative metric, the Fréchet Wav2Vec Distance, which we found to be well correlated with MOS. Both subjectively and quantitatively, our method outperformed the standard MelGAN model.
%R 10.18653/v1/2022.wanlp-1.8
%U https://aclanthology.org/2022.wanlp-1.8
%U https://doi.org/10.18653/v1/2022.wanlp-1.8
%P 76-84
Markdown (Informal)
[Adversarial Text-to-Speech for low-resource languages](https://aclanthology.org/2022.wanlp-1.8) (Elneima & Bińkowski, WANLP 2022)
ACL
- Ashraf Elneima and Mikołaj Bińkowski. 2022. Adversarial Text-to-Speech for low-resource languages. In Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP), pages 76–84, Abu Dhabi, United Arab Emirates (Hybrid). Association for Computational Linguistics.