@inproceedings{bentivogli-etal-2018-machine,
title = "Machine Translation Human Evaluation: an investigation of evaluation based on Post-Editing and its relation with Direct Assessment",
author = "Bentivogli, Luisa and
Cettolo, Mauro and
Federico, Marcello and
Federmann, Christian",
editor = "Turchi, Marco and
Niehues, Jan and
Frederico, Marcello",
booktitle = "Proceedings of the 15th International Conference on Spoken Language Translation",
month = oct # " 29-30",
year = "2018",
address = "Brussels",
publisher = "International Conference on Spoken Language Translation",
url = "https://aclanthology.org/2018.iwslt-1.9/",
pages = "62--69",
abstract = "In this paper we present an analysis of the two most prominent methodologies used for the human evaluation of MT quality, namely evaluation based on Post-Editing (PE) and evaluation based on Direct Assessment (DA). To this purpose, we exploit a publicly available large dataset containing both types of evaluations. We first focus on PE and investigate how sensitive TER-based evaluation is to the type and number of references used. Then, we carry out a comparative analysis of PE and DA to investigate the extent to which the evaluation results obtained by methodologies addressing different human perspectives are similar. This comparison sheds light not only on PE but also on the so-called reference bias related to monolingual DA. Also, we analyze if and how the two methodologies can complement each other`s weaknesses."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bentivogli-etal-2018-machine">
<titleInfo>
<title>Machine Translation Human Evaluation: an investigation of evaluation based on Post-Editing and its relation with Direct Assessment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Bentivogli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mauro</namePart>
<namePart type="family">Cettolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Federico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Federmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct 29-30</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Turchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcello</namePart>
<namePart type="family">Frederico</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Conference on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Brussels</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present an analysis of the two most prominent methodologies used for the human evaluation of MT quality, namely evaluation based on Post-Editing (PE) and evaluation based on Direct Assessment (DA). To this purpose, we exploit a publicly available large dataset containing both types of evaluations. We first focus on PE and investigate how sensitive TER-based evaluation is to the type and number of references used. Then, we carry out a comparative analysis of PE and DA to investigate the extent to which the evaluation results obtained by methodologies addressing different human perspectives are similar. This comparison sheds light not only on PE but also on the so-called reference bias related to monolingual DA. Also, we analyze if and how the two methodologies can complement each other‘s weaknesses.</abstract>
<identifier type="citekey">bentivogli-etal-2018-machine</identifier>
<location>
<url>https://aclanthology.org/2018.iwslt-1.9/</url>
</location>
<part>
<date>2018-oct 29-30</date>
<extent unit="page">
<start>62</start>
<end>69</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Machine Translation Human Evaluation: an investigation of evaluation based on Post-Editing and its relation with Direct Assessment
%A Bentivogli, Luisa
%A Cettolo, Mauro
%A Federico, Marcello
%A Federmann, Christian
%Y Turchi, Marco
%Y Niehues, Jan
%Y Frederico, Marcello
%S Proceedings of the 15th International Conference on Spoken Language Translation
%D 2018
%8 oct 29 30
%I International Conference on Spoken Language Translation
%C Brussels
%F bentivogli-etal-2018-machine
%X In this paper we present an analysis of the two most prominent methodologies used for the human evaluation of MT quality, namely evaluation based on Post-Editing (PE) and evaluation based on Direct Assessment (DA). To this purpose, we exploit a publicly available large dataset containing both types of evaluations. We first focus on PE and investigate how sensitive TER-based evaluation is to the type and number of references used. Then, we carry out a comparative analysis of PE and DA to investigate the extent to which the evaluation results obtained by methodologies addressing different human perspectives are similar. This comparison sheds light not only on PE but also on the so-called reference bias related to monolingual DA. Also, we analyze if and how the two methodologies can complement each other‘s weaknesses.
%U https://aclanthology.org/2018.iwslt-1.9/
%P 62-69
Markdown (Informal)
[Machine Translation Human Evaluation: an investigation of evaluation based on Post-Editing and its relation with Direct Assessment](https://aclanthology.org/2018.iwslt-1.9/) (Bentivogli et al., IWSLT 2018)
ACL