@inproceedings{cettolo-etal-2010-mining,
title = "Mining parallel fragments from comparable texts",
author = "Cettolo, Mauro and
Federico, Marcello and
Bertoldi, Nicola",
booktitle = "Proceedings of the 7th International Workshop on Spoken Language Translation: Papers",
month = dec # " 2-3",
year = "2010",
address = "Paris, France",
url = "https://aclanthology.org/2010.iwslt-papers.3",
pages = "227--234",
abstract = "This paper proposes a novel method for exploiting comparable documents to generate parallel data for machine translation. First, each source document is paired to each sentence of the corresponding target document; second, partial phrase alignments are computed within the paired texts; finally, fragment pairs across linked phrase-pairs are extracted. The algorithm has been tested on two recent challenging news translation tasks. Results show that mining for parallel fragments is more effective than mining for parallel sentences, and that comparable in-domain texts can be more valuable than parallel out-of-domain texts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cettolo-etal-2010-mining">
<titleInfo>
<title>Mining parallel fragments from comparable texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mauro</namePart>
<namePart type="family">Cettolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicola</namePart>
<namePart type="family">Bertoldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-dec 2-3</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th International Workshop on Spoken Language Translation: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">Paris, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper proposes a novel method for exploiting comparable documents to generate parallel data for machine translation. First, each source document is paired to each sentence of the corresponding target document; second, partial phrase alignments are computed within the paired texts; finally, fragment pairs across linked phrase-pairs are extracted. The algorithm has been tested on two recent challenging news translation tasks. Results show that mining for parallel fragments is more effective than mining for parallel sentences, and that comparable in-domain texts can be more valuable than parallel out-of-domain texts.</abstract>
<identifier type="citekey">cettolo-etal-2010-mining</identifier>
<location>
<url>https://aclanthology.org/2010.iwslt-papers.3</url>
</location>
<part>
<date>2010-dec 2-3</date>
<extent unit="page">
<start>227</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mining parallel fragments from comparable texts
%A Cettolo, Mauro
%A Federico, Marcello
%A Bertoldi, Nicola
%S Proceedings of the 7th International Workshop on Spoken Language Translation: Papers
%D 2010
%8 dec 2 3
%C Paris, France
%F cettolo-etal-2010-mining
%X This paper proposes a novel method for exploiting comparable documents to generate parallel data for machine translation. First, each source document is paired to each sentence of the corresponding target document; second, partial phrase alignments are computed within the paired texts; finally, fragment pairs across linked phrase-pairs are extracted. The algorithm has been tested on two recent challenging news translation tasks. Results show that mining for parallel fragments is more effective than mining for parallel sentences, and that comparable in-domain texts can be more valuable than parallel out-of-domain texts.
%U https://aclanthology.org/2010.iwslt-papers.3
%P 227-234
Markdown (Informal)
[Mining parallel fragments from comparable texts](https://aclanthology.org/2010.iwslt-papers.3) (Cettolo et al., IWSLT 2010)
ACL