@inproceedings{akhlaghi-etal-2022-using,
title = "Using the {LARA} Little Prince to compare human and {TTS} audio quality",
author = {Akhlaghi, Elham and
Au{\dh}unard{\'o}ttir, Ingibj{\"o}rg I{\dh}a and
B{\k{a}}czkowska, Anna and
B{\'e}di, Branislav and
Beedar, Hakeem and
Berthelsen, Harald and
Chua, Cathy and
Cucchiarin, Catia and
Habibi, Hanieh and
Horv{\'a}thov{\'a}, Ivana and
Ikeda, Junta and
Maizonniaux, Christ{\`e}le and
N{\'\i} Chiar{\'a}in, Neasa and
Raheb, Chadi and
Rayner, Manny and
Sloan, John and
Tsourakis, Nikos and
Yao, Chunlin},
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.318",
pages = "2967--2975",
abstract = "A popular idea in Computer Assisted Language Learning (CALL) is to use multimodal annotated texts, with annotations typically including embedded audio and translations, to support L2 learning through reading. An important question is how to create good quality audio, which can be done either through human recording or by a Text-To-Speech (TTS) engine. We may reasonably expect TTS to be quicker and easier, but human to be of higher quality. Here, we report a study using the open source LARA platform and ten languages. Samples of audio totalling about five minutes, representing the same four passages taken from LARA versions of Saint-Exup{\`e}ry{'}s {``}Le petit prince{''}, were provided for each language in both human and TTS form; the passages were chosen to instantiate the 2x2 cross product of the conditions dialogue, not-dialogue and humour, not-humour. 251 subjects used a web form to compare human and TTS versions of each item and rate the voices as a whole. For the three languages where TTS did best, English, French and Irish, the evidence from this study and the previous one it extended suggest that TTS audio is now pedagogically adequate and roughly comparable with a non-professional human voice in terms of exemplifying correct pronunciation and prosody. It was however still judged substantially less natural and less pleasant to listen to. No clear evidence was found to support the hypothesis that dialogue and humour pose special problems for TTS. All data and software will be made freely available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="akhlaghi-etal-2022-using">
<titleInfo>
<title>Using the LARA Little Prince to compare human and TTS audio quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elham</namePart>
<namePart type="family">Akhlaghi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ingibjörg</namePart>
<namePart type="given">I\dha</namePart>
<namePart type="family">Au\dhunardóttir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Bączkowska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Branislav</namePart>
<namePart type="family">Bédi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hakeem</namePart>
<namePart type="family">Beedar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harald</namePart>
<namePart type="family">Berthelsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cathy</namePart>
<namePart type="family">Chua</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catia</namePart>
<namePart type="family">Cucchiarin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanieh</namePart>
<namePart type="family">Habibi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivana</namePart>
<namePart type="family">Horváthová</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junta</namePart>
<namePart type="family">Ikeda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christèle</namePart>
<namePart type="family">Maizonniaux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neasa</namePart>
<namePart type="family">Ní Chiaráin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chadi</namePart>
<namePart type="family">Raheb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manny</namePart>
<namePart type="family">Rayner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Sloan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikos</namePart>
<namePart type="family">Tsourakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunlin</namePart>
<namePart type="family">Yao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A popular idea in Computer Assisted Language Learning (CALL) is to use multimodal annotated texts, with annotations typically including embedded audio and translations, to support L2 learning through reading. An important question is how to create good quality audio, which can be done either through human recording or by a Text-To-Speech (TTS) engine. We may reasonably expect TTS to be quicker and easier, but human to be of higher quality. Here, we report a study using the open source LARA platform and ten languages. Samples of audio totalling about five minutes, representing the same four passages taken from LARA versions of Saint-Exupèry’s “Le petit prince”, were provided for each language in both human and TTS form; the passages were chosen to instantiate the 2x2 cross product of the conditions dialogue, not-dialogue and humour, not-humour. 251 subjects used a web form to compare human and TTS versions of each item and rate the voices as a whole. For the three languages where TTS did best, English, French and Irish, the evidence from this study and the previous one it extended suggest that TTS audio is now pedagogically adequate and roughly comparable with a non-professional human voice in terms of exemplifying correct pronunciation and prosody. It was however still judged substantially less natural and less pleasant to listen to. No clear evidence was found to support the hypothesis that dialogue and humour pose special problems for TTS. All data and software will be made freely available.</abstract>
<identifier type="citekey">akhlaghi-etal-2022-using</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.318</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>2967</start>
<end>2975</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using the LARA Little Prince to compare human and TTS audio quality
%A Akhlaghi, Elham
%A Au\dhunardóttir, Ingibjörg I\dha
%A Bączkowska, Anna
%A Bédi, Branislav
%A Beedar, Hakeem
%A Berthelsen, Harald
%A Chua, Cathy
%A Cucchiarin, Catia
%A Habibi, Hanieh
%A Horváthová, Ivana
%A Ikeda, Junta
%A Maizonniaux, Christèle
%A Ní Chiaráin, Neasa
%A Raheb, Chadi
%A Rayner, Manny
%A Sloan, John
%A Tsourakis, Nikos
%A Yao, Chunlin
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F akhlaghi-etal-2022-using
%X A popular idea in Computer Assisted Language Learning (CALL) is to use multimodal annotated texts, with annotations typically including embedded audio and translations, to support L2 learning through reading. An important question is how to create good quality audio, which can be done either through human recording or by a Text-To-Speech (TTS) engine. We may reasonably expect TTS to be quicker and easier, but human to be of higher quality. Here, we report a study using the open source LARA platform and ten languages. Samples of audio totalling about five minutes, representing the same four passages taken from LARA versions of Saint-Exupèry’s “Le petit prince”, were provided for each language in both human and TTS form; the passages were chosen to instantiate the 2x2 cross product of the conditions dialogue, not-dialogue and humour, not-humour. 251 subjects used a web form to compare human and TTS versions of each item and rate the voices as a whole. For the three languages where TTS did best, English, French and Irish, the evidence from this study and the previous one it extended suggest that TTS audio is now pedagogically adequate and roughly comparable with a non-professional human voice in terms of exemplifying correct pronunciation and prosody. It was however still judged substantially less natural and less pleasant to listen to. No clear evidence was found to support the hypothesis that dialogue and humour pose special problems for TTS. All data and software will be made freely available.
%U https://aclanthology.org/2022.lrec-1.318
%P 2967-2975
Markdown (Informal)
[Using the LARA Little Prince to compare human and TTS audio quality](https://aclanthology.org/2022.lrec-1.318) (Akhlaghi et al., LREC 2022)
ACL
- Elham Akhlaghi, Ingibjörg Iða Auðunardóttir, Anna Bączkowska, Branislav Bédi, Hakeem Beedar, Harald Berthelsen, Cathy Chua, Catia Cucchiarin, Hanieh Habibi, Ivana Horváthová, Junta Ikeda, Christèle Maizonniaux, Neasa Ní Chiaráin, Chadi Raheb, Manny Rayner, John Sloan, Nikos Tsourakis, and Chunlin Yao. 2022. Using the LARA Little Prince to compare human and TTS audio quality. In Proceedings of the Thirteenth Language Resources and Evaluation Conference, pages 2967–2975, Marseille, France. European Language Resources Association.