@inproceedings{sainz-etal-2010-tts,
title = "{TTS} Evaluation Campaign with a Common {S}panish Database",
author = "Sainz, I{\~n}aki and
Navas, Eva and
Hern{\'a}ez, Inma and
Bonafonte, Antonio and
Campillo, Francisco",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf",
abstract = "This paper describes the first TTS evaluation campaign designed for Spanish. Seven research institutions took part in the evaluation campaign and developed a voice from a common speech database provided by the organisation. Each participating team had a period of seven weeks to generate a voice. Next, a set of sentences were released and each team had to synthesise them within a week period. Finally, some of the synthesised test audio files were subjectively evaluated via an online test according to the following criteria: similarity to the original voice, naturalness and intelligibility. Box-plots, Wilcoxon tests and WER have been generated in order to analyse the results. Two main conclusions can be drawn: On the one hand, there is considerable margin for improvement to reach the quality level of the natural voice. On the other hand, two systems get significantly better results than the rest: one is based on statistical parametric synthesis and the other one is a concatenative system that makes use of a sinusoidal model to modify both prosody and smooth spectral joints. Therefore, it seems that some kind of spectral control is needed when building voices with a medium size database for unrestricted domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sainz-etal-2010-tts">
<titleInfo>
<title>TTS Evaluation Campaign with a Common Spanish Database</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iñaki</namePart>
<namePart type="family">Sainz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Navas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Inma</namePart>
<namePart type="family">Hernáez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Bonafonte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Campillo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the first TTS evaluation campaign designed for Spanish. Seven research institutions took part in the evaluation campaign and developed a voice from a common speech database provided by the organisation. Each participating team had a period of seven weeks to generate a voice. Next, a set of sentences were released and each team had to synthesise them within a week period. Finally, some of the synthesised test audio files were subjectively evaluated via an online test according to the following criteria: similarity to the original voice, naturalness and intelligibility. Box-plots, Wilcoxon tests and WER have been generated in order to analyse the results. Two main conclusions can be drawn: On the one hand, there is considerable margin for improvement to reach the quality level of the natural voice. On the other hand, two systems get significantly better results than the rest: one is based on statistical parametric synthesis and the other one is a concatenative system that makes use of a sinusoidal model to modify both prosody and smooth spectral joints. Therefore, it seems that some kind of spectral control is needed when building voices with a medium size database for unrestricted domains.</abstract>
<identifier type="citekey">sainz-etal-2010-tts</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TTS Evaluation Campaign with a Common Spanish Database
%A Sainz, Iñaki
%A Navas, Eva
%A Hernáez, Inma
%A Bonafonte, Antonio
%A Campillo, Francisco
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F sainz-etal-2010-tts
%X This paper describes the first TTS evaluation campaign designed for Spanish. Seven research institutions took part in the evaluation campaign and developed a voice from a common speech database provided by the organisation. Each participating team had a period of seven weeks to generate a voice. Next, a set of sentences were released and each team had to synthesise them within a week period. Finally, some of the synthesised test audio files were subjectively evaluated via an online test according to the following criteria: similarity to the original voice, naturalness and intelligibility. Box-plots, Wilcoxon tests and WER have been generated in order to analyse the results. Two main conclusions can be drawn: On the one hand, there is considerable margin for improvement to reach the quality level of the natural voice. On the other hand, two systems get significantly better results than the rest: one is based on statistical parametric synthesis and the other one is a concatenative system that makes use of a sinusoidal model to modify both prosody and smooth spectral joints. Therefore, it seems that some kind of spectral control is needed when building voices with a medium size database for unrestricted domains.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf
Markdown (Informal)
[TTS Evaluation Campaign with a Common Spanish Database](http://www.lrec-conf.org/proceedings/lrec2010/pdf/456_Paper.pdf) (Sainz et al., LREC 2010)
ACL
- Iñaki Sainz, Eva Navas, Inma Hernáez, Antonio Bonafonte, and Francisco Campillo. 2010. TTS Evaluation Campaign with a Common Spanish Database. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), Valletta, Malta. European Language Resources Association (ELRA).