@inproceedings{deneefe-etal-2008-overcoming,
title = "Overcoming Vocabulary Sparsity in {MT} Using Lattices",
author = "DeNeefe, Steve and
Hermjakob, Ulf and
Knight, Kevin",
booktitle = "Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 21-25",
year = "2008",
address = "Waikiki, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2008.amta-papers.7",
pages = "89--96",
abstract = "Source languages with complex word-formation rules present a challenge for statistical machine translation (SMT). In this paper, we take on three facets of this challenge: (1) common stems are fragmented into many different forms in training data, (2) rare and unknown words are frequent in test data, and (3) spelling variation creates additional sparseness problems. We present a novel, lightweight technique for dealing with this fragmentation, based on bilingual data, and we also present a combination of linguistic and statistical techniques for dealing with rare and unknown words. Taking these techniques together, we demonstrate +1.3 and +1.6 BLEU increases on top of strong baselines for Arabic-English machine translation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deneefe-etal-2008-overcoming">
<titleInfo>
<title>Overcoming Vocabulary Sparsity in MT Using Lattices</title>
</titleInfo>
<name type="personal">
<namePart type="given">Steve</namePart>
<namePart type="family">DeNeefe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ulf</namePart>
<namePart type="family">Hermjakob</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Knight</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 21-25</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Waikiki, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Source languages with complex word-formation rules present a challenge for statistical machine translation (SMT). In this paper, we take on three facets of this challenge: (1) common stems are fragmented into many different forms in training data, (2) rare and unknown words are frequent in test data, and (3) spelling variation creates additional sparseness problems. We present a novel, lightweight technique for dealing with this fragmentation, based on bilingual data, and we also present a combination of linguistic and statistical techniques for dealing with rare and unknown words. Taking these techniques together, we demonstrate +1.3 and +1.6 BLEU increases on top of strong baselines for Arabic-English machine translation.</abstract>
<identifier type="citekey">deneefe-etal-2008-overcoming</identifier>
<location>
<url>https://aclanthology.org/2008.amta-papers.7</url>
</location>
<part>
<date>2008-oct 21-25</date>
<extent unit="page">
<start>89</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Overcoming Vocabulary Sparsity in MT Using Lattices
%A DeNeefe, Steve
%A Hermjakob, Ulf
%A Knight, Kevin
%S Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2008
%8 oct 21 25
%I Association for Machine Translation in the Americas
%C Waikiki, USA
%F deneefe-etal-2008-overcoming
%X Source languages with complex word-formation rules present a challenge for statistical machine translation (SMT). In this paper, we take on three facets of this challenge: (1) common stems are fragmented into many different forms in training data, (2) rare and unknown words are frequent in test data, and (3) spelling variation creates additional sparseness problems. We present a novel, lightweight technique for dealing with this fragmentation, based on bilingual data, and we also present a combination of linguistic and statistical techniques for dealing with rare and unknown words. Taking these techniques together, we demonstrate +1.3 and +1.6 BLEU increases on top of strong baselines for Arabic-English machine translation.
%U https://aclanthology.org/2008.amta-papers.7
%P 89-96
Markdown (Informal)
[Overcoming Vocabulary Sparsity in MT Using Lattices](https://aclanthology.org/2008.amta-papers.7) (DeNeefe et al., AMTA 2008)
ACL
- Steve DeNeefe, Ulf Hermjakob, and Kevin Knight. 2008. Overcoming Vocabulary Sparsity in MT Using Lattices. In Proceedings of the 8th Conference of the Association for Machine Translation in the Americas: Research Papers, pages 89–96, Waikiki, USA. Association for Machine Translation in the Americas.