@inproceedings{huck-etal-2011-lexicon,
title = "Lexicon models for hierarchical phrase-based machine translation",
author = "Huck, Matthias and
Mansour, Saab and
Wiesler, Simon and
Ney, Hermann",
editor = {Federico, Marcello and
Hwang, Mei-Yuh and
R{\"o}dder, Margit and
St{\"u}ker, Sebastian},
booktitle = "Proceedings of the 8th International Workshop on Spoken Language Translation: Papers",
month = dec # " 8-9",
year = "2011",
address = "San Francisco, California",
url = "https://aclanthology.org/2011.iwslt-papers.1",
pages = "191--198",
abstract = "In this paper, we investigate lexicon models for hierarchical phrase-based statistical machine translation. We study five types of lexicon models: a model which is extracted from word-aligned training data and{---}given the word alignment matrix{---}relies on pure relative frequencies [1]; the IBM model 1 lexicon [2]; a regularized version of IBM model 1; a triplet lexicon model variant [3]; and a discriminatively trained word lexicon model [4]. We explore sourceto-target models with phrase-level as well as sentence-level scoring and target-to-source models with scoring on phrase level only. For the first two types of lexicon models, we compare several scoring variants. All models are used during search, i.e. they are incorporated directly into the log-linear model combination of the decoder. Phrase table smoothing with triplet lexicon models and with discriminative word lexicons are novel contributions. We also propose a new regularization technique for IBM model 1 by means of the Kullback-Leibler divergence with the empirical unigram distribution as regularization term. Experiments are carried out on the large-scale NIST Chinese→English translation task and on the English→French and Arabic→English IWSLT TED tasks. For Chinese→English and English→French, we obtain the best results by using the discriminative word lexicon to smooth our phrase tables.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huck-etal-2011-lexicon">
<titleInfo>
<title>Lexicon models for hierarchical phrase-based machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthias</namePart>
<namePart type="family">Huck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saab</namePart>
<namePart type="family">Mansour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Wiesler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2011-dec 8-9</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mei-Yuh</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margit</namePart>
<namePart type="family">Rödder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Stüker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">San Francisco, California</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we investigate lexicon models for hierarchical phrase-based statistical machine translation. We study five types of lexicon models: a model which is extracted from word-aligned training data and—given the word alignment matrix—relies on pure relative frequencies [1]; the IBM model 1 lexicon [2]; a regularized version of IBM model 1; a triplet lexicon model variant [3]; and a discriminatively trained word lexicon model [4]. We explore sourceto-target models with phrase-level as well as sentence-level scoring and target-to-source models with scoring on phrase level only. For the first two types of lexicon models, we compare several scoring variants. All models are used during search, i.e. they are incorporated directly into the log-linear model combination of the decoder. Phrase table smoothing with triplet lexicon models and with discriminative word lexicons are novel contributions. We also propose a new regularization technique for IBM model 1 by means of the Kullback-Leibler divergence with the empirical unigram distribution as regularization term. Experiments are carried out on the large-scale NIST Chinese→English translation task and on the English→French and Arabic→English IWSLT TED tasks. For Chinese→English and English→French, we obtain the best results by using the discriminative word lexicon to smooth our phrase tables.</abstract>
<identifier type="citekey">huck-etal-2011-lexicon</identifier>
<location>
<url>https://aclanthology.org/2011.iwslt-papers.1</url>
</location>
<part>
<date>2011-dec 8-9</date>
<extent unit="page">
<start>191</start>
<end>198</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lexicon models for hierarchical phrase-based machine translation
%A Huck, Matthias
%A Mansour, Saab
%A Wiesler, Simon
%A Ney, Hermann
%Y Federico, Marcello
%Y Hwang, Mei-Yuh
%Y Rödder, Margit
%Y Stüker, Sebastian
%S Proceedings of the 8th International Workshop on Spoken Language Translation: Papers
%D 2011
%8 dec 8 9
%C San Francisco, California
%F huck-etal-2011-lexicon
%X In this paper, we investigate lexicon models for hierarchical phrase-based statistical machine translation. We study five types of lexicon models: a model which is extracted from word-aligned training data and—given the word alignment matrix—relies on pure relative frequencies [1]; the IBM model 1 lexicon [2]; a regularized version of IBM model 1; a triplet lexicon model variant [3]; and a discriminatively trained word lexicon model [4]. We explore sourceto-target models with phrase-level as well as sentence-level scoring and target-to-source models with scoring on phrase level only. For the first two types of lexicon models, we compare several scoring variants. All models are used during search, i.e. they are incorporated directly into the log-linear model combination of the decoder. Phrase table smoothing with triplet lexicon models and with discriminative word lexicons are novel contributions. We also propose a new regularization technique for IBM model 1 by means of the Kullback-Leibler divergence with the empirical unigram distribution as regularization term. Experiments are carried out on the large-scale NIST Chinese→English translation task and on the English→French and Arabic→English IWSLT TED tasks. For Chinese→English and English→French, we obtain the best results by using the discriminative word lexicon to smooth our phrase tables.
%U https://aclanthology.org/2011.iwslt-papers.1
%P 191-198
Markdown (Informal)
[Lexicon models for hierarchical phrase-based machine translation](https://aclanthology.org/2011.iwslt-papers.1) (Huck et al., IWSLT 2011)
ACL