@inproceedings{boruta-jastrzebska-2012-phonemic,
title = "A Phonemic Corpus of {P}olish Child-Directed Speech",
author = "Boruta, Luc and
Jastrzebska, Justyna",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/1120_Paper.pdf",
pages = "1017--1020",
abstract = "Recent advances in modeling early language acquisition are due not only to the development of machine-learning techniques, but also to the increasing availability of data on child language and child-adult interaction. In the absence of recordings of child-directed speech, or when models explicitly require such a representation for training data, phonemic transcriptions are commonly used as input data. We present a novel (and to our knowledge, the first) phonemic corpus of Polish child-directed speech. It is derived from the Weist corpus of Polish, freely available from the seminal CHILDES database. For the sake of reproducibility, and to exemplify the typical trade-off between ecological validity and sample size, we report all preprocessing operations and transcription guidelines. Contributed linguistic resources include updated CHAT-formatted transcripts with phonemic transcriptions in a novel phonology tier, as well as by-product data, such as a phonemic lexicon of Polish. All resources are distributed under the LGPL-LR license.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boruta-jastrzebska-2012-phonemic">
<titleInfo>
<title>A Phonemic Corpus of Polish Child-Directed Speech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luc</namePart>
<namePart type="family">Boruta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Justyna</namePart>
<namePart type="family">Jastrzebska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent advances in modeling early language acquisition are due not only to the development of machine-learning techniques, but also to the increasing availability of data on child language and child-adult interaction. In the absence of recordings of child-directed speech, or when models explicitly require such a representation for training data, phonemic transcriptions are commonly used as input data. We present a novel (and to our knowledge, the first) phonemic corpus of Polish child-directed speech. It is derived from the Weist corpus of Polish, freely available from the seminal CHILDES database. For the sake of reproducibility, and to exemplify the typical trade-off between ecological validity and sample size, we report all preprocessing operations and transcription guidelines. Contributed linguistic resources include updated CHAT-formatted transcripts with phonemic transcriptions in a novel phonology tier, as well as by-product data, such as a phonemic lexicon of Polish. All resources are distributed under the LGPL-LR license.</abstract>
<identifier type="citekey">boruta-jastrzebska-2012-phonemic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/1120_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>1017</start>
<end>1020</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Phonemic Corpus of Polish Child-Directed Speech
%A Boruta, Luc
%A Jastrzebska, Justyna
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F boruta-jastrzebska-2012-phonemic
%X Recent advances in modeling early language acquisition are due not only to the development of machine-learning techniques, but also to the increasing availability of data on child language and child-adult interaction. In the absence of recordings of child-directed speech, or when models explicitly require such a representation for training data, phonemic transcriptions are commonly used as input data. We present a novel (and to our knowledge, the first) phonemic corpus of Polish child-directed speech. It is derived from the Weist corpus of Polish, freely available from the seminal CHILDES database. For the sake of reproducibility, and to exemplify the typical trade-off between ecological validity and sample size, we report all preprocessing operations and transcription guidelines. Contributed linguistic resources include updated CHAT-formatted transcripts with phonemic transcriptions in a novel phonology tier, as well as by-product data, such as a phonemic lexicon of Polish. All resources are distributed under the LGPL-LR license.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/1120_Paper.pdf
%P 1017-1020
Markdown (Informal)
[A Phonemic Corpus of Polish Child-Directed Speech](http://www.lrec-conf.org/proceedings/lrec2012/pdf/1120_Paper.pdf) (Boruta & Jastrzebska, LREC 2012)
ACL
- Luc Boruta and Justyna Jastrzebska. 2012. A Phonemic Corpus of Polish Child-Directed Speech. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 1017–1020, Istanbul, Turkey. European Language Resources Association (ELRA).