@inproceedings{maergner-etal-2011-unsupervised,
title = "Unsupervised vocabulary selection for simultaneous lecture translation",
author = "Maergner, Paul and
Kilgour, Kevin and
Lane, Ian and
Waibel, Alex",
editor = {Federico, Marcello and
Hwang, Mei-Yuh and
R{\"o}dder, Margit and
St{\"u}ker, Sebastian},
booktitle = "Proceedings of the 8th International Workshop on Spoken Language Translation: Papers",
month = dec # " 8-9",
year = "2011",
address = "San Francisco, California",
url = "https://aclanthology.org/2011.iwslt-papers.4",
pages = "214--221",
abstract = "In this work, we propose a novel method for vocabulary selection which enables simultaneous speech recognition systems for lectures to automatically adapt to the diverse topics that occur in educational and scientific lectures. Utilizing materials that are available before the lecture begins, such as lecture slides, our proposed framework iteratively searches for related documents on the World Wide Web and generates a lecture-specific vocabulary and language model based on the resulting documents. In this paper, we introduce a novel method for vocabulary selection where we rank vocabulary that occurs in the collected documents based on a relevance score which is calculated using a combination of word features. Vocabulary selection is a critical component for topic adaptation that has typically been overlooked in prior works. On the interACT German-English simultaneous lecture translation system our proposed approach significantly improved vocabulary coverage, reducing the out-of-vocabulary rate on average by 57.0{\%} and up to 84.9{\%}, compared to a lecture-independent baseline. Furthermore, our approach reduced the word error rate by up to 25.3{\%} (on average 13.2{\%} across all lectures), compared to a lectureindependent baseline.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maergner-etal-2011-unsupervised">
<titleInfo>
<title>Unsupervised vocabulary selection for simultaneous lecture translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Maergner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Kilgour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ian</namePart>
<namePart type="family">Lane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-dec 8-9</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei-Yuh</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margit</namePart>
<namePart type="family">Rödder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">San Francisco, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we propose a novel method for vocabulary selection which enables simultaneous speech recognition systems for lectures to automatically adapt to the diverse topics that occur in educational and scientific lectures. Utilizing materials that are available before the lecture begins, such as lecture slides, our proposed framework iteratively searches for related documents on the World Wide Web and generates a lecture-specific vocabulary and language model based on the resulting documents. In this paper, we introduce a novel method for vocabulary selection where we rank vocabulary that occurs in the collected documents based on a relevance score which is calculated using a combination of word features. Vocabulary selection is a critical component for topic adaptation that has typically been overlooked in prior works. On the interACT German-English simultaneous lecture translation system our proposed approach significantly improved vocabulary coverage, reducing the out-of-vocabulary rate on average by 57.0% and up to 84.9%, compared to a lecture-independent baseline. Furthermore, our approach reduced the word error rate by up to 25.3% (on average 13.2% across all lectures), compared to a lectureindependent baseline.</abstract>
<identifier type="citekey">maergner-etal-2011-unsupervised</identifier>
<location>
<url>https://aclanthology.org/2011.iwslt-papers.4</url>
</location>
<part>
<date>2011-dec 8-9</date>
<extent unit="page">
<start>214</start>
<end>221</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised vocabulary selection for simultaneous lecture translation
%A Maergner, Paul
%A Kilgour, Kevin
%A Lane, Ian
%A Waibel, Alex
%Y Federico, Marcello
%Y Hwang, Mei-Yuh
%Y Rödder, Margit
%Y Stüker, Sebastian
%S Proceedings of the 8th International Workshop on Spoken Language Translation: Papers
%D 2011
%8 dec 8 9
%C San Francisco, California
%F maergner-etal-2011-unsupervised
%X In this work, we propose a novel method for vocabulary selection which enables simultaneous speech recognition systems for lectures to automatically adapt to the diverse topics that occur in educational and scientific lectures. Utilizing materials that are available before the lecture begins, such as lecture slides, our proposed framework iteratively searches for related documents on the World Wide Web and generates a lecture-specific vocabulary and language model based on the resulting documents. In this paper, we introduce a novel method for vocabulary selection where we rank vocabulary that occurs in the collected documents based on a relevance score which is calculated using a combination of word features. Vocabulary selection is a critical component for topic adaptation that has typically been overlooked in prior works. On the interACT German-English simultaneous lecture translation system our proposed approach significantly improved vocabulary coverage, reducing the out-of-vocabulary rate on average by 57.0% and up to 84.9%, compared to a lecture-independent baseline. Furthermore, our approach reduced the word error rate by up to 25.3% (on average 13.2% across all lectures), compared to a lectureindependent baseline.
%U https://aclanthology.org/2011.iwslt-papers.4
%P 214-221
Markdown (Informal)
[Unsupervised vocabulary selection for simultaneous lecture translation](https://aclanthology.org/2011.iwslt-papers.4) (Maergner et al., IWSLT 2011)
ACL