@inproceedings{gretter-2012-focusing,
title = "Focusing language models for automatic speech recognition",
author = "Gretter, Daniele Falavigna Roberto",
booktitle = "Proceedings of the 9th International Workshop on Spoken Language Translation: Papers",
month = dec # " 6-7",
year = "2012",
address = "Hong Kong, Table of contents",
url = "https://aclanthology.org/2012.iwslt-papers.4",
pages = "171--178",
abstract = "This paper describes a method for selecting text data from a corpus with the aim of training auxiliary Language Models (LMs) for an Automatic Speech Recognition (ASR) system. A novel similarity score function is proposed, which allows to score each document belonging to the corpus in order to select those with the highest scores for training auxiliary LMs which are linearly interpolated with the baseline one. The similarity score function makes use of {''}similarity models{''} built from the automatic transcriptions furnished by earlier stages of the ASR system, while the documents selected for training auxiliary LMs are drawn from the same set of data used to train the baseline LM used in the ASR system. In this way, the resulting interpolated LMs are {''}focused{''} towards the output of the recognizer itself. The approach allows to improve word error rate, measured on a task of spontaneous speech, of about 3{\%} relative. It is important to note that a similar improvement has been obtained using an {''}in-domain{''} set of texts data not contained in the sources used to train the baseline LM. In addition, we compared the proposed similarity score function with two other ones based on perplexity (PP) and on TFxIDF (Term Frequency x Inverse Document Frequency) vector space model. The proposed approach provides about the same performance as that based on TFxIDF model but requires both lower computation and occupation memory.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gretter-2012-focusing">
<titleInfo>
<title>Focusing language models for automatic speech recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniele</namePart>
<namePart type="given">Falavigna</namePart>
<namePart type="given">Roberto</namePart>
<namePart type="family">Gretter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-dec 6-7</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Hong Kong, Table of contents</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes a method for selecting text data from a corpus with the aim of training auxiliary Language Models (LMs) for an Automatic Speech Recognition (ASR) system. A novel similarity score function is proposed, which allows to score each document belonging to the corpus in order to select those with the highest scores for training auxiliary LMs which are linearly interpolated with the baseline one. The similarity score function makes use of ”similarity models” built from the automatic transcriptions furnished by earlier stages of the ASR system, while the documents selected for training auxiliary LMs are drawn from the same set of data used to train the baseline LM used in the ASR system. In this way, the resulting interpolated LMs are ”focused” towards the output of the recognizer itself. The approach allows to improve word error rate, measured on a task of spontaneous speech, of about 3% relative. It is important to note that a similar improvement has been obtained using an ”in-domain” set of texts data not contained in the sources used to train the baseline LM. In addition, we compared the proposed similarity score function with two other ones based on perplexity (PP) and on TFxIDF (Term Frequency x Inverse Document Frequency) vector space model. The proposed approach provides about the same performance as that based on TFxIDF model but requires both lower computation and occupation memory.</abstract>
<identifier type="citekey">gretter-2012-focusing</identifier>
<location>
<url>https://aclanthology.org/2012.iwslt-papers.4</url>
</location>
<part>
<date>2012-dec 6-7</date>
<extent unit="page">
<start>171</start>
<end>178</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Focusing language models for automatic speech recognition
%A Gretter, Daniele Falavigna Roberto
%S Proceedings of the 9th International Workshop on Spoken Language Translation: Papers
%D 2012
%8 dec 6 7
%C Hong Kong, Table of contents
%F gretter-2012-focusing
%X This paper describes a method for selecting text data from a corpus with the aim of training auxiliary Language Models (LMs) for an Automatic Speech Recognition (ASR) system. A novel similarity score function is proposed, which allows to score each document belonging to the corpus in order to select those with the highest scores for training auxiliary LMs which are linearly interpolated with the baseline one. The similarity score function makes use of ”similarity models” built from the automatic transcriptions furnished by earlier stages of the ASR system, while the documents selected for training auxiliary LMs are drawn from the same set of data used to train the baseline LM used in the ASR system. In this way, the resulting interpolated LMs are ”focused” towards the output of the recognizer itself. The approach allows to improve word error rate, measured on a task of spontaneous speech, of about 3% relative. It is important to note that a similar improvement has been obtained using an ”in-domain” set of texts data not contained in the sources used to train the baseline LM. In addition, we compared the proposed similarity score function with two other ones based on perplexity (PP) and on TFxIDF (Term Frequency x Inverse Document Frequency) vector space model. The proposed approach provides about the same performance as that based on TFxIDF model but requires both lower computation and occupation memory.
%U https://aclanthology.org/2012.iwslt-papers.4
%P 171-178
Markdown (Informal)
[Focusing language models for automatic speech recognition](https://aclanthology.org/2012.iwslt-papers.4) (Gretter, IWSLT 2012)
ACL