@inproceedings{el-kahlout-yvon-2010-pay,
title = "The pay-offs of preprocessing for {G}erman-{E}nglish statistical machine translation",
author = "El-Kahlout, Ilknur Durgar and
Yvon, Francois",
booktitle = "Proceedings of the 7th International Workshop on Spoken Language Translation: Papers",
month = dec # " 2-3",
year = "2010",
address = "Paris, France",
url = "https://aclanthology.org/2010.iwslt-papers.6",
pages = "251--258",
abstract = "In this paper, we present the result of our work on improving the preprocessing for German-English statistical machine translation. We implemented and tested various improvements aimed at i) converting German texts to the new orthographic conventions; ii) performing a new tokenization for German; iii) normalizing lexical redundancy with the help of POS tagging and morphological analysis; iv) splitting German compound words with frequency based algorithm and; v) reducing singletons and out-of-vocabulary words. All these steps are performed during preprocessing on the German side. Combining all these processes, we reduced 10{\%} of the singletons, 2{\%} OOV words, and obtained 1.5 absolute (7{\%} relative) BLEU improvement on the WMT 2010 German to English News translation task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="el-kahlout-yvon-2010-pay">
<titleInfo>
<title>The pay-offs of preprocessing for German-English statistical machine translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ilknur</namePart>
<namePart type="given">Durgar</namePart>
<namePart type="family">El-Kahlout</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francois</namePart>
<namePart type="family">Yvon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-dec 2-3</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Paris, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present the result of our work on improving the preprocessing for German-English statistical machine translation. We implemented and tested various improvements aimed at i) converting German texts to the new orthographic conventions; ii) performing a new tokenization for German; iii) normalizing lexical redundancy with the help of POS tagging and morphological analysis; iv) splitting German compound words with frequency based algorithm and; v) reducing singletons and out-of-vocabulary words. All these steps are performed during preprocessing on the German side. Combining all these processes, we reduced 10% of the singletons, 2% OOV words, and obtained 1.5 absolute (7% relative) BLEU improvement on the WMT 2010 German to English News translation task.</abstract>
<identifier type="citekey">el-kahlout-yvon-2010-pay</identifier>
<location>
<url>https://aclanthology.org/2010.iwslt-papers.6</url>
</location>
<part>
<date>2010-dec 2-3</date>
<extent unit="page">
<start>251</start>
<end>258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The pay-offs of preprocessing for German-English statistical machine translation
%A El-Kahlout, Ilknur Durgar
%A Yvon, Francois
%S Proceedings of the 7th International Workshop on Spoken Language Translation: Papers
%D 2010
%8 dec 2 3
%C Paris, France
%F el-kahlout-yvon-2010-pay
%X In this paper, we present the result of our work on improving the preprocessing for German-English statistical machine translation. We implemented and tested various improvements aimed at i) converting German texts to the new orthographic conventions; ii) performing a new tokenization for German; iii) normalizing lexical redundancy with the help of POS tagging and morphological analysis; iv) splitting German compound words with frequency based algorithm and; v) reducing singletons and out-of-vocabulary words. All these steps are performed during preprocessing on the German side. Combining all these processes, we reduced 10% of the singletons, 2% OOV words, and obtained 1.5 absolute (7% relative) BLEU improvement on the WMT 2010 German to English News translation task.
%U https://aclanthology.org/2010.iwslt-papers.6
%P 251-258
Markdown (Informal)
[The pay-offs of preprocessing for German-English statistical machine translation](https://aclanthology.org/2010.iwslt-papers.6) (El-Kahlout & Yvon, IWSLT 2010)
ACL