@inproceedings{lecorve-etal-2008-use,
title = "On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems",
author = "Lecorv{\'e}, Gw{\'e}nol{\'e} and
Gravier, Guillaume and
S{\'e}billot, Pascale",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf",
abstract = "Language models used in current automatic speech recognition systems are trained on general-purpose corpora and are therefore not relevant to transcribe spoken documents dealing with successive precise topics, such as long multimedia streams, frequently tacking reportages and debates. To overcome this problem, this paper shows that Web resources and natural language processing techniques can be effective to automatically adapt the baseline language model of an automatic speech recognition system to any encountered topic. More precisely, we detail how to characterize the topic of transcription segment and how to collect Web pages from which a topic-specific language model can be trained. Then, an adapted language model is obtained by combining the topic-specific language model with the general-purpose language model. Finally, new transcriptions are generated using the adapted language model and are compared with transcriptions previously obtained with the baseline language model. Experiments show that our topic adaptation technique leads to significant transcription quality gains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lecorve-etal-2008-use">
<titleInfo>
<title>On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gwénolé</namePart>
<namePart type="family">Lecorvé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guillaume</namePart>
<namePart type="family">Gravier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pascale</namePart>
<namePart type="family">Sébillot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models used in current automatic speech recognition systems are trained on general-purpose corpora and are therefore not relevant to transcribe spoken documents dealing with successive precise topics, such as long multimedia streams, frequently tacking reportages and debates. To overcome this problem, this paper shows that Web resources and natural language processing techniques can be effective to automatically adapt the baseline language model of an automatic speech recognition system to any encountered topic. More precisely, we detail how to characterize the topic of transcription segment and how to collect Web pages from which a topic-specific language model can be trained. Then, an adapted language model is obtained by combining the topic-specific language model with the general-purpose language model. Finally, new transcriptions are generated using the adapted language model and are compared with transcriptions previously obtained with the baseline language model. Experiments show that our topic adaptation technique leads to significant transcription quality gains.</abstract>
<identifier type="citekey">lecorve-etal-2008-use</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems
%A Lecorvé, Gwénolé
%A Gravier, Guillaume
%A Sébillot, Pascale
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F lecorve-etal-2008-use
%X Language models used in current automatic speech recognition systems are trained on general-purpose corpora and are therefore not relevant to transcribe spoken documents dealing with successive precise topics, such as long multimedia streams, frequently tacking reportages and debates. To overcome this problem, this paper shows that Web resources and natural language processing techniques can be effective to automatically adapt the baseline language model of an automatic speech recognition system to any encountered topic. More precisely, we detail how to characterize the topic of transcription segment and how to collect Web pages from which a topic-specific language model can be trained. Then, an adapted language model is obtained by combining the topic-specific language model with the general-purpose language model. Finally, new transcriptions are generated using the adapted language model and are compared with transcriptions previously obtained with the baseline language model. Experiments show that our topic adaptation technique leads to significant transcription quality gains.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf
Markdown (Informal)
[On the Use of Web Resources and Natural Language Processing Techniques to Improve Automatic Speech Recognition Systems](http://www.lrec-conf.org/proceedings/lrec2008/pdf/155_paper.pdf) (Lecorvé et al., LREC 2008)
ACL