@inproceedings{cucchiarini-etal-2006-jasmin,
title = "{JASMIN}-{CGN}: Extension of the Spoken {D}utch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality",
author = "Cucchiarini, Catia and
Van hamme, Hugo and
van Herwijnen, Olga and
Smits, Felix",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/254_pdf.pdf",
abstract = "Large speech corpora (LSC) constitute an indispensable resource for conducting research in speech processing and for developing real-life speech applications. In 2004 the Spoken Dutch Corpus (CGN) became available, a corpus of standard Dutch as spoken by adult natives in the Netherlands and Flanders. Owing to budget constraints, CGN does not include speech of children, non-natives, elderly people and recordings of speech produced in human-machine interactions. Since such recordings would be extremely useful for conducting research and for developing HLT applications for these specific groups of speakers of Dutch, a new project, JASMIN-CGN, was started which aims at extending CGN in different ways: by collecting a corpus of contemporary Dutch as spoken by children of different age groups, non-natives with different mother tongues and elderly people in the Netherlands and Flanders and, in addition, by collecting speech material in a communication setting that was not envisaged in CGN: human-machine interaction. We expect that the knowledge gathered from these data can be generalized to developing appropriate systems also for other speaker groups (i.e. adult natives). One third of the data will be collected in Flanders and two thirds in the Netherlands.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cucchiarini-etal-2006-jasmin">
<titleInfo>
<title>JASMIN-CGN: Extension of the Spoken Dutch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Catia</namePart>
<namePart type="family">Cucchiarini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="family">Van hamme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">van Herwijnen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Smits</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large speech corpora (LSC) constitute an indispensable resource for conducting research in speech processing and for developing real-life speech applications. In 2004 the Spoken Dutch Corpus (CGN) became available, a corpus of standard Dutch as spoken by adult natives in the Netherlands and Flanders. Owing to budget constraints, CGN does not include speech of children, non-natives, elderly people and recordings of speech produced in human-machine interactions. Since such recordings would be extremely useful for conducting research and for developing HLT applications for these specific groups of speakers of Dutch, a new project, JASMIN-CGN, was started which aims at extending CGN in different ways: by collecting a corpus of contemporary Dutch as spoken by children of different age groups, non-natives with different mother tongues and elderly people in the Netherlands and Flanders and, in addition, by collecting speech material in a communication setting that was not envisaged in CGN: human-machine interaction. We expect that the knowledge gathered from these data can be generalized to developing appropriate systems also for other speaker groups (i.e. adult natives). One third of the data will be collected in Flanders and two thirds in the Netherlands.</abstract>
<identifier type="citekey">cucchiarini-etal-2006-jasmin</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/254_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JASMIN-CGN: Extension of the Spoken Dutch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality
%A Cucchiarini, Catia
%A Van hamme, Hugo
%A van Herwijnen, Olga
%A Smits, Felix
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F cucchiarini-etal-2006-jasmin
%X Large speech corpora (LSC) constitute an indispensable resource for conducting research in speech processing and for developing real-life speech applications. In 2004 the Spoken Dutch Corpus (CGN) became available, a corpus of standard Dutch as spoken by adult natives in the Netherlands and Flanders. Owing to budget constraints, CGN does not include speech of children, non-natives, elderly people and recordings of speech produced in human-machine interactions. Since such recordings would be extremely useful for conducting research and for developing HLT applications for these specific groups of speakers of Dutch, a new project, JASMIN-CGN, was started which aims at extending CGN in different ways: by collecting a corpus of contemporary Dutch as spoken by children of different age groups, non-natives with different mother tongues and elderly people in the Netherlands and Flanders and, in addition, by collecting speech material in a communication setting that was not envisaged in CGN: human-machine interaction. We expect that the knowledge gathered from these data can be generalized to developing appropriate systems also for other speaker groups (i.e. adult natives). One third of the data will be collected in Flanders and two thirds in the Netherlands.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/254_pdf.pdf
Markdown (Informal)
[JASMIN-CGN: Extension of the Spoken Dutch Corpus with Speech of Elderly People, Children and Non-natives in the Human-Machine Interaction Modality](http://www.lrec-conf.org/proceedings/lrec2006/pdf/254_pdf.pdf) (Cucchiarini et al., LREC 2006)
ACL