@inproceedings{vanmassenhove-etal-2021-machine,
title = "Machine Translationese: Effects of Algorithmic Bias on Linguistic Complexity in Machine Translation",
author = "Vanmassenhove, Eva and
Shterionov, Dimitar and
Gwilliam, Matthew",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.188",
doi = "10.18653/v1/2021.eacl-main.188",
pages = "2203--2213",
abstract = "Recent studies in the field of Machine Translation (MT) and Natural Language Processing (NLP) have shown that existing models amplify biases observed in the training data. The amplification of biases in language technology has mainly been examined with respect to specific phenomena, such as gender bias. In this work, we go beyond the study of gender in MT and investigate how bias amplification might affect language in a broader sense. We hypothesize that the {`}algorithmic bias{'}, i.e. an exacerbation of frequently observed patterns in combination with a loss of less frequent ones, not only exacerbates societal biases present in current datasets but could also lead to an artificially impoverished language: {`}machine translationese{'}. We assess the linguistic richness (on a lexical and morphological level) of translations created by different data-driven MT paradigms {--} phrase-based statistical (PB-SMT) and neural MT (NMT). Our experiments show that there is a loss of lexical and syntactic richness in the translations produced by all investigated MT paradigms for two language pairs (EN-FR and EN-ES).",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vanmassenhove-etal-2021-machine">
<titleInfo>
<title>Machine Translationese: Effects of Algorithmic Bias on Linguistic Complexity in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Vanmassenhove</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Shterionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Gwilliam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recent studies in the field of Machine Translation (MT) and Natural Language Processing (NLP) have shown that existing models amplify biases observed in the training data. The amplification of biases in language technology has mainly been examined with respect to specific phenomena, such as gender bias. In this work, we go beyond the study of gender in MT and investigate how bias amplification might affect language in a broader sense. We hypothesize that the ‘algorithmic bias’, i.e. an exacerbation of frequently observed patterns in combination with a loss of less frequent ones, not only exacerbates societal biases present in current datasets but could also lead to an artificially impoverished language: ‘machine translationese’. We assess the linguistic richness (on a lexical and morphological level) of translations created by different data-driven MT paradigms – phrase-based statistical (PB-SMT) and neural MT (NMT). Our experiments show that there is a loss of lexical and syntactic richness in the translations produced by all investigated MT paradigms for two language pairs (EN-FR and EN-ES).</abstract>
<identifier type="citekey">vanmassenhove-etal-2021-machine</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.188</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.188</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>2203</start>
<end>2213</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translationese: Effects of Algorithmic Bias on Linguistic Complexity in Machine Translation
%A Vanmassenhove, Eva
%A Shterionov, Dimitar
%A Gwilliam, Matthew
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F vanmassenhove-etal-2021-machine
%X Recent studies in the field of Machine Translation (MT) and Natural Language Processing (NLP) have shown that existing models amplify biases observed in the training data. The amplification of biases in language technology has mainly been examined with respect to specific phenomena, such as gender bias. In this work, we go beyond the study of gender in MT and investigate how bias amplification might affect language in a broader sense. We hypothesize that the ‘algorithmic bias’, i.e. an exacerbation of frequently observed patterns in combination with a loss of less frequent ones, not only exacerbates societal biases present in current datasets but could also lead to an artificially impoverished language: ‘machine translationese’. We assess the linguistic richness (on a lexical and morphological level) of translations created by different data-driven MT paradigms – phrase-based statistical (PB-SMT) and neural MT (NMT). Our experiments show that there is a loss of lexical and syntactic richness in the translations produced by all investigated MT paradigms for two language pairs (EN-FR and EN-ES).
%R 10.18653/v1/2021.eacl-main.188
%U https://aclanthology.org/2021.eacl-main.188
%U https://doi.org/10.18653/v1/2021.eacl-main.188
%P 2203-2213
Markdown (Informal)
[Machine Translationese: Effects of Algorithmic Bias on Linguistic Complexity in Machine Translation](https://aclanthology.org/2021.eacl-main.188) (Vanmassenhove et al., EACL 2021)
ACL