@inproceedings{desot-etal-2020-corpus,
title = "Corpus Generation for Voice Command in Smart Home and the Effect of Speech Synthesis on End-to-End {SLU}",
author = "Desot, Thierry and
Portet, Fran{\c{c}}ois and
Vacher, Michel",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.786",
pages = "6395--6404",
abstract = "Massive amounts of annotated data greatly contributed to the advance of the machine learning field. However such large data sets are often unavailable for novel tasks performed in realistic environments such as smart homes. In this domain, semantically annotated large voice command corpora for Spoken Language Understanding (SLU) are scarce, especially for non-English languages. We present the automatic generation process of a synthetic semantically-annotated corpus of French commands for smart-home to train pipeline and End-to-End (E2E) SLU models. SLU is typically performed through Automatic Speech Recognition (ASR) and Natural Language Understanding (NLU) in a pipeline. Since errors at the ASR stage reduce the NLU performance, an alternative approach is End-to-End (E2E) SLU to jointly perform ASR and NLU. To that end, the artificial corpus was fed to a text-to-speech (TTS) system to generate synthetic speech data. All models were evaluated on voice commands acquired in a real smart home. We show that artificial data can be combined with real data within the same training set or used as a stand-alone training corpus. The synthetic speech quality was assessedby comparing it to real data using dynamic time warping (DTW).",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="desot-etal-2020-corpus">
<titleInfo>
<title>Corpus Generation for Voice Command in Smart Home and the Effect of Speech Synthesis on End-to-End SLU</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Desot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">François</namePart>
<namePart type="family">Portet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Vacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Massive amounts of annotated data greatly contributed to the advance of the machine learning field. However such large data sets are often unavailable for novel tasks performed in realistic environments such as smart homes. In this domain, semantically annotated large voice command corpora for Spoken Language Understanding (SLU) are scarce, especially for non-English languages. We present the automatic generation process of a synthetic semantically-annotated corpus of French commands for smart-home to train pipeline and End-to-End (E2E) SLU models. SLU is typically performed through Automatic Speech Recognition (ASR) and Natural Language Understanding (NLU) in a pipeline. Since errors at the ASR stage reduce the NLU performance, an alternative approach is End-to-End (E2E) SLU to jointly perform ASR and NLU. To that end, the artificial corpus was fed to a text-to-speech (TTS) system to generate synthetic speech data. All models were evaluated on voice commands acquired in a real smart home. We show that artificial data can be combined with real data within the same training set or used as a stand-alone training corpus. The synthetic speech quality was assessedby comparing it to real data using dynamic time warping (DTW).</abstract>
<identifier type="citekey">desot-etal-2020-corpus</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.786</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6395</start>
<end>6404</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Generation for Voice Command in Smart Home and the Effect of Speech Synthesis on End-to-End SLU
%A Desot, Thierry
%A Portet, François
%A Vacher, Michel
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F desot-etal-2020-corpus
%X Massive amounts of annotated data greatly contributed to the advance of the machine learning field. However such large data sets are often unavailable for novel tasks performed in realistic environments such as smart homes. In this domain, semantically annotated large voice command corpora for Spoken Language Understanding (SLU) are scarce, especially for non-English languages. We present the automatic generation process of a synthetic semantically-annotated corpus of French commands for smart-home to train pipeline and End-to-End (E2E) SLU models. SLU is typically performed through Automatic Speech Recognition (ASR) and Natural Language Understanding (NLU) in a pipeline. Since errors at the ASR stage reduce the NLU performance, an alternative approach is End-to-End (E2E) SLU to jointly perform ASR and NLU. To that end, the artificial corpus was fed to a text-to-speech (TTS) system to generate synthetic speech data. All models were evaluated on voice commands acquired in a real smart home. We show that artificial data can be combined with real data within the same training set or used as a stand-alone training corpus. The synthetic speech quality was assessedby comparing it to real data using dynamic time warping (DTW).
%U https://aclanthology.org/2020.lrec-1.786
%P 6395-6404
Markdown (Informal)
[Corpus Generation for Voice Command in Smart Home and the Effect of Speech Synthesis on End-to-End SLU](https://aclanthology.org/2020.lrec-1.786) (Desot et al., LREC 2020)
ACL