@inproceedings{larkin-etal-2021-like,
title = "Like Chalk and Cheese? On the Effects of Translationese in {MT} Training",
author = "Larkin, Samuel and
Simard, Michel and
Knowles, Rebecca",
editor = "Duh, Kevin and
Guzm{\'a}n, Francisco",
booktitle = "Proceedings of Machine Translation Summit XVIII: Research Track",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-research.9",
pages = "103--113",
abstract = "We revisit the topic of translation direction in the data used for training neural machine translation systems and focusing on a real-world scenario with known translation direction and imbalances in translation direction: the Canadian Hansard. According to automatic metrics and we observe that using parallel data that was produced in the {``}matching{''} translation direction (Authentic source and translationese target) improves translation quality. In cases of data imbalance in terms of translation direction and we find that tagging of translation direction can close the performance gap. We perform a human evaluation that differs slightly from the automatic metrics and but nevertheless confirms that for this French-English dataset that is known to contain high-quality translations and authentic or tagged mixed source improves over translationese source for training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="larkin-etal-2021-like">
<titleInfo>
<title>Like Chalk and Cheese? On the Effects of Translationese in MT Training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Larkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michel</namePart>
<namePart type="family">Simard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Knowles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XVIII: Research Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francisco</namePart>
<namePart type="family">Guzmán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We revisit the topic of translation direction in the data used for training neural machine translation systems and focusing on a real-world scenario with known translation direction and imbalances in translation direction: the Canadian Hansard. According to automatic metrics and we observe that using parallel data that was produced in the “matching” translation direction (Authentic source and translationese target) improves translation quality. In cases of data imbalance in terms of translation direction and we find that tagging of translation direction can close the performance gap. We perform a human evaluation that differs slightly from the automatic metrics and but nevertheless confirms that for this French-English dataset that is known to contain high-quality translations and authentic or tagged mixed source improves over translationese source for training.</abstract>
<identifier type="citekey">larkin-etal-2021-like</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-research.9</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>103</start>
<end>113</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Like Chalk and Cheese? On the Effects of Translationese in MT Training
%A Larkin, Samuel
%A Simard, Michel
%A Knowles, Rebecca
%Y Duh, Kevin
%Y Guzmán, Francisco
%S Proceedings of Machine Translation Summit XVIII: Research Track
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F larkin-etal-2021-like
%X We revisit the topic of translation direction in the data used for training neural machine translation systems and focusing on a real-world scenario with known translation direction and imbalances in translation direction: the Canadian Hansard. According to automatic metrics and we observe that using parallel data that was produced in the “matching” translation direction (Authentic source and translationese target) improves translation quality. In cases of data imbalance in terms of translation direction and we find that tagging of translation direction can close the performance gap. We perform a human evaluation that differs slightly from the automatic metrics and but nevertheless confirms that for this French-English dataset that is known to contain high-quality translations and authentic or tagged mixed source improves over translationese source for training.
%U https://aclanthology.org/2021.mtsummit-research.9
%P 103-113
Markdown (Informal)
[Like Chalk and Cheese? On the Effects of Translationese in MT Training](https://aclanthology.org/2021.mtsummit-research.9) (Larkin et al., MTSummit 2021)
ACL