@inproceedings{schwenk-2008-investigations,
title = "Investigations on large-scale lightly-supervised training for statistical machine translation.",
author = "Schwenk, Holger",
booktitle = "Proceedings of the 5th International Workshop on Spoken Language Translation: Papers",
month = oct # " 20-21",
year = "2008",
address = "Waikiki, Hawaii",
url = "https://aclanthology.org/2008.iwslt-papers.6",
pages = "182--189",
abstract = "Sentence-aligned bilingual texts are a crucial resource to build statistical machine translation (SMT) systems. In this paper we propose to apply lightly-supervised training to produce additional parallel data. The idea is to translate large amounts of monolingual data (up to 275M words) with an SMT system, and to use those as additional training data. Results are reported for the translation from French into English. We consider two setups: first the intial SMT system is only trained with a very limited amount of human-produced translations, and then the case where we have more than 100 million words. In both conditions, lightly-supervised training achieves significant improvements of the BLEU score.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schwenk-2008-investigations">
<titleInfo>
<title>Investigations on large-scale lightly-supervised training for statistical machine translation.</title>
</titleInfo>
<name type="personal">
<namePart type="given">Holger</namePart>
<namePart type="family">Schwenk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-oct 20-21</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Waikiki, Hawaii</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentence-aligned bilingual texts are a crucial resource to build statistical machine translation (SMT) systems. In this paper we propose to apply lightly-supervised training to produce additional parallel data. The idea is to translate large amounts of monolingual data (up to 275M words) with an SMT system, and to use those as additional training data. Results are reported for the translation from French into English. We consider two setups: first the intial SMT system is only trained with a very limited amount of human-produced translations, and then the case where we have more than 100 million words. In both conditions, lightly-supervised training achieves significant improvements of the BLEU score.</abstract>
<identifier type="citekey">schwenk-2008-investigations</identifier>
<location>
<url>https://aclanthology.org/2008.iwslt-papers.6</url>
</location>
<part>
<date>2008-oct 20-21</date>
<extent unit="page">
<start>182</start>
<end>189</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigations on large-scale lightly-supervised training for statistical machine translation.
%A Schwenk, Holger
%S Proceedings of the 5th International Workshop on Spoken Language Translation: Papers
%D 2008
%8 oct 20 21
%C Waikiki, Hawaii
%F schwenk-2008-investigations
%X Sentence-aligned bilingual texts are a crucial resource to build statistical machine translation (SMT) systems. In this paper we propose to apply lightly-supervised training to produce additional parallel data. The idea is to translate large amounts of monolingual data (up to 275M words) with an SMT system, and to use those as additional training data. Results are reported for the translation from French into English. We consider two setups: first the intial SMT system is only trained with a very limited amount of human-produced translations, and then the case where we have more than 100 million words. In both conditions, lightly-supervised training achieves significant improvements of the BLEU score.
%U https://aclanthology.org/2008.iwslt-papers.6
%P 182-189
Markdown (Informal)
[Investigations on large-scale lightly-supervised training for statistical machine translation.](https://aclanthology.org/2008.iwslt-papers.6) (Schwenk, IWSLT 2008)
ACL