@inproceedings{barras-etal-2004-automatic,
title = "Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts",
author = {Barras, C. and
Adda, G. and
Adda-Decker, M. and
Habert, B. and
de Mare{\"u}il, P. Boula and
Paroubek, P.},
editor = "Lino, Maria Teresa and
Xavier, Maria Francisca and
Ferreira, F{\'a}tima and
Costa, Rute and
Silva, Raquel",
booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}`04)",
month = may,
year = "2004",
address = "Lisbon, Portugal",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L04-1167/",
abstract = "The present study focuses on automatic processing of sibling resources of audio and written documents, such as available in audio archives or for parliament debates: written texts are close but not exact audio transcripts. Such resources deserve attention for several reasons: they represent an interesting testbed for studying differences between written and spoken material and they yield low cost resources for acoustic model training. When automatically transcribing the audio data, regions of agreement between automatic transcripts and written sources allow to transfer time-codes to the written documents: this may be helpful in an audio archive or audio information retrieval environment. Regions of disagreement can be automatically selected for further correction by human transcribers. This study makes use of 10 hours of French radio interview archives with corresponding press-oriented transcripts. The audio corpus has then been transcribed using the LIMSI speech recognizer resulting in automatic transcripts, exhibiting an average word error rate of 12{\%}. 80{\%} of the text corpus (with word chunks of at least five words) can be exactly aligned with the automatic transcripts of the audio data. The residual word error rate on these 80{\%} is less than 1{\%}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barras-etal-2004-automatic">
<titleInfo>
<title>Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="family">Barras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">G</namePart>
<namePart type="family">Adda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="family">Adda-Decker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">B</namePart>
<namePart type="family">Habert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">P</namePart>
<namePart type="given">Boula</namePart>
<namePart type="family">de Mareüil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">P</namePart>
<namePart type="family">Paroubek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2004-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC‘04)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Lino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Francisca</namePart>
<namePart type="family">Xavier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fátima</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rute</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Lisbon, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The present study focuses on automatic processing of sibling resources of audio and written documents, such as available in audio archives or for parliament debates: written texts are close but not exact audio transcripts. Such resources deserve attention for several reasons: they represent an interesting testbed for studying differences between written and spoken material and they yield low cost resources for acoustic model training. When automatically transcribing the audio data, regions of agreement between automatic transcripts and written sources allow to transfer time-codes to the written documents: this may be helpful in an audio archive or audio information retrieval environment. Regions of disagreement can be automatically selected for further correction by human transcribers. This study makes use of 10 hours of French radio interview archives with corresponding press-oriented transcripts. The audio corpus has then been transcribed using the LIMSI speech recognizer resulting in automatic transcripts, exhibiting an average word error rate of 12%. 80% of the text corpus (with word chunks of at least five words) can be exactly aligned with the automatic transcripts of the audio data. The residual word error rate on these 80% is less than 1%.</abstract>
<identifier type="citekey">barras-etal-2004-automatic</identifier>
<location>
<url>https://aclanthology.org/L04-1167/</url>
</location>
<part>
<date>2004-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts
%A Barras, C.
%A Adda, G.
%A Adda-Decker, M.
%A Habert, B.
%A de Mareüil, P. Boula
%A Paroubek, P.
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC‘04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F barras-etal-2004-automatic
%X The present study focuses on automatic processing of sibling resources of audio and written documents, such as available in audio archives or for parliament debates: written texts are close but not exact audio transcripts. Such resources deserve attention for several reasons: they represent an interesting testbed for studying differences between written and spoken material and they yield low cost resources for acoustic model training. When automatically transcribing the audio data, regions of agreement between automatic transcripts and written sources allow to transfer time-codes to the written documents: this may be helpful in an audio archive or audio information retrieval environment. Regions of disagreement can be automatically selected for further correction by human transcribers. This study makes use of 10 hours of French radio interview archives with corresponding press-oriented transcripts. The audio corpus has then been transcribed using the LIMSI speech recognizer resulting in automatic transcripts, exhibiting an average word error rate of 12%. 80% of the text corpus (with word chunks of at least five words) can be exactly aligned with the automatic transcripts of the audio data. The residual word error rate on these 80% is less than 1%.
%U https://aclanthology.org/L04-1167/
Markdown (Informal)
[Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts](https://aclanthology.org/L04-1167/) (Barras et al., LREC 2004)
ACL