@inproceedings{rayner-etal-2012-using,
title = "Using Source-Language Transformations to Address Register Mismatches in {SMT}",
author = "Rayner, Manny and
Bouillon, Pierrette and
Haddow, Barry",
booktitle = "Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers",
month = oct # " 28-" # nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-papers.25",
abstract = "Mismatches between training and test data are a ubiquitous problem for real SMT applications. In this paper, we examine a type of mismatch that commonly arises when translating from French and similar languages: available training data is mostly formal register, but test data may well be informal register. We consider methods for defining surface transformations that map common informal language constructions into their formal language counterparts, or vice versa; we then describe two ways to use these mappings, either to create artificial training data or to pre-process source text at run-time. An initial evaluation performed using crowd-sourced comparisons of alternate translations produced by a French-to-English SMT system suggests that both methods can improve performance, with run-time pre-processing being the more effective of the two.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rayner-etal-2012-using">
<titleInfo>
<title>Using Source-Language Transformations to Address Register Mismatches in SMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manny</namePart>
<namePart type="family">Rayner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierrette</namePart>
<namePart type="family">Bouillon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-oct 28-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Mismatches between training and test data are a ubiquitous problem for real SMT applications. In this paper, we examine a type of mismatch that commonly arises when translating from French and similar languages: available training data is mostly formal register, but test data may well be informal register. We consider methods for defining surface transformations that map common informal language constructions into their formal language counterparts, or vice versa; we then describe two ways to use these mappings, either to create artificial training data or to pre-process source text at run-time. An initial evaluation performed using crowd-sourced comparisons of alternate translations produced by a French-to-English SMT system suggests that both methods can improve performance, with run-time pre-processing being the more effective of the two.</abstract>
<identifier type="citekey">rayner-etal-2012-using</identifier>
<location>
<url>https://aclanthology.org/2012.amta-papers.25</url>
</location>
<part>
<date>2012-oct 28-nov 1</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using Source-Language Transformations to Address Register Mismatches in SMT
%A Rayner, Manny
%A Bouillon, Pierrette
%A Haddow, Barry
%S Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Research Papers
%D 2012
%8 oct 28 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F rayner-etal-2012-using
%X Mismatches between training and test data are a ubiquitous problem for real SMT applications. In this paper, we examine a type of mismatch that commonly arises when translating from French and similar languages: available training data is mostly formal register, but test data may well be informal register. We consider methods for defining surface transformations that map common informal language constructions into their formal language counterparts, or vice versa; we then describe two ways to use these mappings, either to create artificial training data or to pre-process source text at run-time. An initial evaluation performed using crowd-sourced comparisons of alternate translations produced by a French-to-English SMT system suggests that both methods can improve performance, with run-time pre-processing being the more effective of the two.
%U https://aclanthology.org/2012.amta-papers.25
Markdown (Informal)
[Using Source-Language Transformations to Address Register Mismatches in SMT](https://aclanthology.org/2012.amta-papers.25) (Rayner et al., AMTA 2012)
ACL