@inproceedings{marasek-gubrynowicz-2008-design,
title = "Design and Data Collection for Spoken {P}olish Dialogs Database",
author = "Marasek, Krzysztof and
Gubrynowicz, Ryszard",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/57_paper.pdf",
abstract = "Spoken corpora provide a critical resource for research, development and evaluation of spoken dialog systems. This paper describes the telephone spoken dialog corpus for Polish created by Polish-Japanese Institute of Information Technology team within the LUNA project (IST 033549). The main goal of this project is to create a robust natural spoken language understanding (SLU) toolkit, which can be used to improve the speech-enabled telecom services in multilingual context (Italian, French and Polish). The corpus has been collected at the call center of Warsaw Transport Authority, manually transcribed and richly annotated on acoustic, syntactic and semantic levels. The most frequent users requests concern city traffic information (public transportation stops, routes, schedules, trip planning etc.). The collected database consists of two parts: 500 human-human dialogs of approx. 670 minutes long with a vocabulary of ca. 8,000 words and 500 human-machine dialogs recorded via the use of Wizard-of-Oz paradigm. The syntactic and semantic annotation is carried out by another team (Mykowiecka et al., 2007). This database is the first one collected for spontaneous Polish speech recorded through telecommunication lines and will be used for development and evaluation of automatic speech recognition (ASR) and robust natural spoken language understanding (SLU) components.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marasek-gubrynowicz-2008-design">
<titleInfo>
<title>Design and Data Collection for Spoken Polish Dialogs Database</title>
</titleInfo>
<name type="personal">
<namePart type="given">Krzysztof</namePart>
<namePart type="family">Marasek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryszard</namePart>
<namePart type="family">Gubrynowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spoken corpora provide a critical resource for research, development and evaluation of spoken dialog systems. This paper describes the telephone spoken dialog corpus for Polish created by Polish-Japanese Institute of Information Technology team within the LUNA project (IST 033549). The main goal of this project is to create a robust natural spoken language understanding (SLU) toolkit, which can be used to improve the speech-enabled telecom services in multilingual context (Italian, French and Polish). The corpus has been collected at the call center of Warsaw Transport Authority, manually transcribed and richly annotated on acoustic, syntactic and semantic levels. The most frequent users requests concern city traffic information (public transportation stops, routes, schedules, trip planning etc.). The collected database consists of two parts: 500 human-human dialogs of approx. 670 minutes long with a vocabulary of ca. 8,000 words and 500 human-machine dialogs recorded via the use of Wizard-of-Oz paradigm. The syntactic and semantic annotation is carried out by another team (Mykowiecka et al., 2007). This database is the first one collected for spontaneous Polish speech recorded through telecommunication lines and will be used for development and evaluation of automatic speech recognition (ASR) and robust natural spoken language understanding (SLU) components.</abstract>
<identifier type="citekey">marasek-gubrynowicz-2008-design</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/57_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Design and Data Collection for Spoken Polish Dialogs Database
%A Marasek, Krzysztof
%A Gubrynowicz, Ryszard
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F marasek-gubrynowicz-2008-design
%X Spoken corpora provide a critical resource for research, development and evaluation of spoken dialog systems. This paper describes the telephone spoken dialog corpus for Polish created by Polish-Japanese Institute of Information Technology team within the LUNA project (IST 033549). The main goal of this project is to create a robust natural spoken language understanding (SLU) toolkit, which can be used to improve the speech-enabled telecom services in multilingual context (Italian, French and Polish). The corpus has been collected at the call center of Warsaw Transport Authority, manually transcribed and richly annotated on acoustic, syntactic and semantic levels. The most frequent users requests concern city traffic information (public transportation stops, routes, schedules, trip planning etc.). The collected database consists of two parts: 500 human-human dialogs of approx. 670 minutes long with a vocabulary of ca. 8,000 words and 500 human-machine dialogs recorded via the use of Wizard-of-Oz paradigm. The syntactic and semantic annotation is carried out by another team (Mykowiecka et al., 2007). This database is the first one collected for spontaneous Polish speech recorded through telecommunication lines and will be used for development and evaluation of automatic speech recognition (ASR) and robust natural spoken language understanding (SLU) components.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/57_paper.pdf
Markdown (Informal)
[Design and Data Collection for Spoken Polish Dialogs Database](http://www.lrec-conf.org/proceedings/lrec2008/pdf/57_paper.pdf) (Marasek & Gubrynowicz, LREC 2008)
ACL