@inproceedings{sennrich-volk-2010-mt,
title = "{MT}-based Sentence Alignment for {OCR}-generated Parallel Texts",
author = "Sennrich, Rico and
Volk, Martin",
booktitle = "Proceedings of the 9th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 31-" # nov # " 4",
year = "2010",
address = "Denver, Colorado, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2010.amta-papers.14",
abstract = "The performance of current sentence alignment tools varies according to the to-be-aligned texts. We have found existing tools unsuitable for hard-to-align parallel texts and describe an alternative alignment algorithm. The basic idea is to use machine translations of a text and BLEU as a similarity score to find reliable alignments which are used as anchor points. The gaps between these anchor points are then filled using BLEU-based and length-based heuristics. We show that this approach outperforms state-of-the-art algorithms in our alignment task, and that this improvement in alignment quality translates into better SMT performance. Furthermore, we show that even length-based alignment algorithms profit from having a machine translation as a point of comparison.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sennrich-volk-2010-mt">
<titleInfo>
<title>MT-based Sentence Alignment for OCR-generated Parallel Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rico</namePart>
<namePart type="family">Sennrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Volk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-oct 31-nov 4</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Denver, Colorado, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The performance of current sentence alignment tools varies according to the to-be-aligned texts. We have found existing tools unsuitable for hard-to-align parallel texts and describe an alternative alignment algorithm. The basic idea is to use machine translations of a text and BLEU as a similarity score to find reliable alignments which are used as anchor points. The gaps between these anchor points are then filled using BLEU-based and length-based heuristics. We show that this approach outperforms state-of-the-art algorithms in our alignment task, and that this improvement in alignment quality translates into better SMT performance. Furthermore, we show that even length-based alignment algorithms profit from having a machine translation as a point of comparison.</abstract>
<identifier type="citekey">sennrich-volk-2010-mt</identifier>
<location>
<url>https://aclanthology.org/2010.amta-papers.14</url>
</location>
<part>
<date>2010-oct 31-nov 4</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MT-based Sentence Alignment for OCR-generated Parallel Texts
%A Sennrich, Rico
%A Volk, Martin
%S Proceedings of the 9th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2010
%8 oct 31 nov 4
%I Association for Machine Translation in the Americas
%C Denver, Colorado, USA
%F sennrich-volk-2010-mt
%X The performance of current sentence alignment tools varies according to the to-be-aligned texts. We have found existing tools unsuitable for hard-to-align parallel texts and describe an alternative alignment algorithm. The basic idea is to use machine translations of a text and BLEU as a similarity score to find reliable alignments which are used as anchor points. The gaps between these anchor points are then filled using BLEU-based and length-based heuristics. We show that this approach outperforms state-of-the-art algorithms in our alignment task, and that this improvement in alignment quality translates into better SMT performance. Furthermore, we show that even length-based alignment algorithms profit from having a machine translation as a point of comparison.
%U https://aclanthology.org/2010.amta-papers.14
Markdown (Informal)
[MT-based Sentence Alignment for OCR-generated Parallel Texts](https://aclanthology.org/2010.amta-papers.14) (Sennrich & Volk, AMTA 2010)
ACL