@inproceedings{chalamandaris-etal-2014-using,
title = "Using Audio Books for Training a Text-to-Speech System",
author = "Chalamandaris, Aimilios and
Tsiakoulis, Pirros and
Karabetsos, Sotiris and
Raptis, Spyros",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf",
pages = "3076--3080",
abstract = "Creating new voices for a TTS system often requires a costly procedure of designing and recording an audio corpus, a time consuming and effort intensive task. Using publicly available audiobooks as the raw material of a spoken corpus for such systems creates new perspectives regarding the possibility of creating new synthetic voices quickly and with limited effort. This paper addresses the issue of creating new synthetic voices based on audiobook data in an automated method. As an audiobook includes several types of speech, such as narration, character playing etc., special care is given in identifying the data subset that leads to a more neutral and general purpose synthetic voice. The main goal is to identify and address the effect the audiobook speech diversity on the resulting TTS system. Along with the methodology for coping with this diversity in the speech data, we also describe a set of experiments performed in order to investigate the efficiency of different approaches for automatic data pruning. Further plans for exploiting the diversity of the speech incorporated in an audiobook are also described in the final section and conclusions are drawn.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chalamandaris-etal-2014-using">
<titleInfo>
<title>Using Audio Books for Training a Text-to-Speech System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aimilios</namePart>
<namePart type="family">Chalamandaris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pirros</namePart>
<namePart type="family">Tsiakoulis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sotiris</namePart>
<namePart type="family">Karabetsos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Spyros</namePart>
<namePart type="family">Raptis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Creating new voices for a TTS system often requires a costly procedure of designing and recording an audio corpus, a time consuming and effort intensive task. Using publicly available audiobooks as the raw material of a spoken corpus for such systems creates new perspectives regarding the possibility of creating new synthetic voices quickly and with limited effort. This paper addresses the issue of creating new synthetic voices based on audiobook data in an automated method. As an audiobook includes several types of speech, such as narration, character playing etc., special care is given in identifying the data subset that leads to a more neutral and general purpose synthetic voice. The main goal is to identify and address the effect the audiobook speech diversity on the resulting TTS system. Along with the methodology for coping with this diversity in the speech data, we also describe a set of experiments performed in order to investigate the efficiency of different approaches for automatic data pruning. Further plans for exploiting the diversity of the speech incorporated in an audiobook are also described in the final section and conclusions are drawn.</abstract>
<identifier type="citekey">chalamandaris-etal-2014-using</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>3076</start>
<end>3080</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Audio Books for Training a Text-to-Speech System
%A Chalamandaris, Aimilios
%A Tsiakoulis, Pirros
%A Karabetsos, Sotiris
%A Raptis, Spyros
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F chalamandaris-etal-2014-using
%X Creating new voices for a TTS system often requires a costly procedure of designing and recording an audio corpus, a time consuming and effort intensive task. Using publicly available audiobooks as the raw material of a spoken corpus for such systems creates new perspectives regarding the possibility of creating new synthetic voices quickly and with limited effort. This paper addresses the issue of creating new synthetic voices based on audiobook data in an automated method. As an audiobook includes several types of speech, such as narration, character playing etc., special care is given in identifying the data subset that leads to a more neutral and general purpose synthetic voice. The main goal is to identify and address the effect the audiobook speech diversity on the resulting TTS system. Along with the methodology for coping with this diversity in the speech data, we also describe a set of experiments performed in order to investigate the efficiency of different approaches for automatic data pruning. Further plans for exploiting the diversity of the speech incorporated in an audiobook are also described in the final section and conclusions are drawn.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf
%P 3076-3080
Markdown (Informal)
[Using Audio Books for Training a Text-to-Speech System](http://www.lrec-conf.org/proceedings/lrec2014/pdf/838_Paper.pdf) (Chalamandaris et al., LREC 2014)
ACL
- Aimilios Chalamandaris, Pirros Tsiakoulis, Sotiris Karabetsos, and Spyros Raptis. 2014. Using Audio Books for Training a Text-to-Speech System. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 3076–3080, Reykjavik, Iceland. European Language Resources Association (ELRA).