@inproceedings{lancien-etal-2020-developing,
title = "Developing Resources for Automated Speech Processing of {Q}uebec {F}rench",
author = "Lancien, M{\'e}lanie and
C{\^o}t{\'e}, Marie-H{\'e}l{\`e}ne and
Bigi, Brigitte",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.655/",
pages = "5323--5328",
language = "eng",
ISBN = "979-10-95546-34-4",
abstract = "The analysis of the structure of speech nearly always rests on the alignment of the speech recording with a phonetic transcription. Nowadays several tools can perform this speech segmentation automatically. However, none of them allows the automatic segmentation of Quebec French (QF hereafter), the acoustics and phonotactics of QF differing widely from that of France French (FF hereafter). To adequately segment QF, features like diphthongization of long vowels and affrication of coronal stops have to be taken into account. Thus acoustic models for automatic segmentation must be trained on speech samples exhibiting those phenomena. Dictionaries and lexicons must also be adapted and integrate differences in lexical units and in the phonology of QF. This paper presents the development of linguistic resources to be included into SPPAS software tool in order to get Text normalization, Phonetization, Alignment and Syllabification. We adapted the existing French lexicon and developed a QF-specific pronunciation dictionary. We then created an acoustic model from the existing ones and adapted it with 5 minutes of manually time-aligned data. These new resources are all freely distributed with SPPAS version 2.7; they perform the full process of speech segmentation in Quebec French."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lancien-etal-2020-developing">
<titleInfo>
<title>Developing Resources for Automated Speech Processing of Quebec French</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mélanie</namePart>
<namePart type="family">Lancien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Hélène</namePart>
<namePart type="family">Côté</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brigitte</namePart>
<namePart type="family">Bigi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The analysis of the structure of speech nearly always rests on the alignment of the speech recording with a phonetic transcription. Nowadays several tools can perform this speech segmentation automatically. However, none of them allows the automatic segmentation of Quebec French (QF hereafter), the acoustics and phonotactics of QF differing widely from that of France French (FF hereafter). To adequately segment QF, features like diphthongization of long vowels and affrication of coronal stops have to be taken into account. Thus acoustic models for automatic segmentation must be trained on speech samples exhibiting those phenomena. Dictionaries and lexicons must also be adapted and integrate differences in lexical units and in the phonology of QF. This paper presents the development of linguistic resources to be included into SPPAS software tool in order to get Text normalization, Phonetization, Alignment and Syllabification. We adapted the existing French lexicon and developed a QF-specific pronunciation dictionary. We then created an acoustic model from the existing ones and adapted it with 5 minutes of manually time-aligned data. These new resources are all freely distributed with SPPAS version 2.7; they perform the full process of speech segmentation in Quebec French.</abstract>
<identifier type="citekey">lancien-etal-2020-developing</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.655/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>5323</start>
<end>5328</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Developing Resources for Automated Speech Processing of Quebec French
%A Lancien, Mélanie
%A Côté, Marie-Hélène
%A Bigi, Brigitte
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G eng
%F lancien-etal-2020-developing
%X The analysis of the structure of speech nearly always rests on the alignment of the speech recording with a phonetic transcription. Nowadays several tools can perform this speech segmentation automatically. However, none of them allows the automatic segmentation of Quebec French (QF hereafter), the acoustics and phonotactics of QF differing widely from that of France French (FF hereafter). To adequately segment QF, features like diphthongization of long vowels and affrication of coronal stops have to be taken into account. Thus acoustic models for automatic segmentation must be trained on speech samples exhibiting those phenomena. Dictionaries and lexicons must also be adapted and integrate differences in lexical units and in the phonology of QF. This paper presents the development of linguistic resources to be included into SPPAS software tool in order to get Text normalization, Phonetization, Alignment and Syllabification. We adapted the existing French lexicon and developed a QF-specific pronunciation dictionary. We then created an acoustic model from the existing ones and adapted it with 5 minutes of manually time-aligned data. These new resources are all freely distributed with SPPAS version 2.7; they perform the full process of speech segmentation in Quebec French.
%U https://aclanthology.org/2020.lrec-1.655/
%P 5323-5328
Markdown (Informal)
[Developing Resources for Automated Speech Processing of Quebec French](https://aclanthology.org/2020.lrec-1.655/) (Lancien et al., LREC 2020)
ACL