@inproceedings{magistry-etal-2024-experiments,
title = "Experiments on Speech Synthesis for Teochew, Can {T}aiwanese Help ?",
author = "Magistry, Pierre and
Wang, Ilaine and
Lim, Ty Eng",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.598",
pages = "6849--6854",
abstract = "This paper reports on our preliminary experiments in speech processing for Teochew, an under-resourced Sinitic language spoken both in China and around the world in diasporan communities. Following the recent uptick of interest in Teochew from heritage speakers of the diaspora and in order to respond to the needs of this community, we develop a Teochew Text-to-Speech system. We describe experiments to build this system and to assess the possible contribution of available resources in Taiwanese Hokkien, the closest language with a significant body of resources. The results of these experiments are not as conclusive as we expected: the Taiwanese dataset did not help our model significantly, but considering our objectives, we find it encouraging that they show that a large training dataset was not necessary for this precise task. A promising model could still be obtained with only a small dataset of Teochew. We hope that this work inspires other communities of speakers of languages in a revitalization phase.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="magistry-etal-2024-experiments">
<titleInfo>
<title>Experiments on Speech Synthesis for Teochew, Can Taiwanese Help ?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Magistry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilaine</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ty</namePart>
<namePart type="given">Eng</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper reports on our preliminary experiments in speech processing for Teochew, an under-resourced Sinitic language spoken both in China and around the world in diasporan communities. Following the recent uptick of interest in Teochew from heritage speakers of the diaspora and in order to respond to the needs of this community, we develop a Teochew Text-to-Speech system. We describe experiments to build this system and to assess the possible contribution of available resources in Taiwanese Hokkien, the closest language with a significant body of resources. The results of these experiments are not as conclusive as we expected: the Taiwanese dataset did not help our model significantly, but considering our objectives, we find it encouraging that they show that a large training dataset was not necessary for this precise task. A promising model could still be obtained with only a small dataset of Teochew. We hope that this work inspires other communities of speakers of languages in a revitalization phase.</abstract>
<identifier type="citekey">magistry-etal-2024-experiments</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.598</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>6849</start>
<end>6854</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Experiments on Speech Synthesis for Teochew, Can Taiwanese Help ?
%A Magistry, Pierre
%A Wang, Ilaine
%A Lim, Ty Eng
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F magistry-etal-2024-experiments
%X This paper reports on our preliminary experiments in speech processing for Teochew, an under-resourced Sinitic language spoken both in China and around the world in diasporan communities. Following the recent uptick of interest in Teochew from heritage speakers of the diaspora and in order to respond to the needs of this community, we develop a Teochew Text-to-Speech system. We describe experiments to build this system and to assess the possible contribution of available resources in Taiwanese Hokkien, the closest language with a significant body of resources. The results of these experiments are not as conclusive as we expected: the Taiwanese dataset did not help our model significantly, but considering our objectives, we find it encouraging that they show that a large training dataset was not necessary for this precise task. A promising model could still be obtained with only a small dataset of Teochew. We hope that this work inspires other communities of speakers of languages in a revitalization phase.
%U https://aclanthology.org/2024.lrec-main.598
%P 6849-6854
Markdown (Informal)
[Experiments on Speech Synthesis for Teochew, Can Taiwanese Help ?](https://aclanthology.org/2024.lrec-main.598) (Magistry et al., LREC-COLING 2024)
ACL