@inproceedings{fitzgerald-jelinek-2008-linguistic,
title = "Linguistic Resources for Reconstructing Spontaneous Speech Text",
author = "Fitzgerald, Erin and
Jelinek, Frederick",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf",
abstract = "The output of a speech recognition system is not always ideal for subsequent downstream processing, in part because speakers themselves often make mistakes. A system would accomplish speech reconstruction of its spontaneous speech input if its output were to represent, in flawless, fluent, and content-preserving English, the message that the speaker intended to convey. These cleaner speech transcripts would allow for more accurate language processing as needed for NLP tasks such as machine translation and conversation summarization, which often rely on grammatical input. Recognizing that supervised statistical methods to identify and transform ill-formed areas of the transcript will require richly labeled resources, we have built the Spontaneous Speech Reconstruction corpus. This small corpus of reconstructed and aligned conversational telephone speech transcriptions for the Fisher conversational telephone speech corpus (Strassel and Walker, 2004) was annotated on several levels including string transformations and predicate-argument structure, and will be shared with the linguistic research community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fitzgerald-jelinek-2008-linguistic">
<titleInfo>
<title>Linguistic Resources for Reconstructing Spontaneous Speech Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erin</namePart>
<namePart type="family">Fitzgerald</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frederick</namePart>
<namePart type="family">Jelinek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The output of a speech recognition system is not always ideal for subsequent downstream processing, in part because speakers themselves often make mistakes. A system would accomplish speech reconstruction of its spontaneous speech input if its output were to represent, in flawless, fluent, and content-preserving English, the message that the speaker intended to convey. These cleaner speech transcripts would allow for more accurate language processing as needed for NLP tasks such as machine translation and conversation summarization, which often rely on grammatical input. Recognizing that supervised statistical methods to identify and transform ill-formed areas of the transcript will require richly labeled resources, we have built the Spontaneous Speech Reconstruction corpus. This small corpus of reconstructed and aligned conversational telephone speech transcriptions for the Fisher conversational telephone speech corpus (Strassel and Walker, 2004) was annotated on several levels including string transformations and predicate-argument structure, and will be shared with the linguistic research community.</abstract>
<identifier type="citekey">fitzgerald-jelinek-2008-linguistic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic Resources for Reconstructing Spontaneous Speech Text
%A Fitzgerald, Erin
%A Jelinek, Frederick
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F fitzgerald-jelinek-2008-linguistic
%X The output of a speech recognition system is not always ideal for subsequent downstream processing, in part because speakers themselves often make mistakes. A system would accomplish speech reconstruction of its spontaneous speech input if its output were to represent, in flawless, fluent, and content-preserving English, the message that the speaker intended to convey. These cleaner speech transcripts would allow for more accurate language processing as needed for NLP tasks such as machine translation and conversation summarization, which often rely on grammatical input. Recognizing that supervised statistical methods to identify and transform ill-formed areas of the transcript will require richly labeled resources, we have built the Spontaneous Speech Reconstruction corpus. This small corpus of reconstructed and aligned conversational telephone speech transcriptions for the Fisher conversational telephone speech corpus (Strassel and Walker, 2004) was annotated on several levels including string transformations and predicate-argument structure, and will be shared with the linguistic research community.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf
Markdown (Informal)
[Linguistic Resources for Reconstructing Spontaneous Speech Text](http://www.lrec-conf.org/proceedings/lrec2008/pdf/874_paper.pdf) (Fitzgerald & Jelinek, LREC 2008)
ACL