@inproceedings{aranberri-2024-analysis,
title = "Analysis of the Annotations from a Crowd {MT} Evaluation Initiative: Case Study for the {S}panish-{B}asque Pair",
author = "Aranberri, Nora",
editor = "Scarton, Carolina and
Prescott, Charlotte and
Bayliss, Chris and
Oakley, Chris and
Wright, Joanna and
Wrigley, Stuart and
Song, Xingyi and
Gow-Smith, Edward and
Bawden, Rachel and
S{\'a}nchez-Cartagena, V{\'\i}ctor M and
Cadwell, Patrick and
Lapshinova-Koltunski, Ekaterina and
Cabarr{\~a}o, Vera and
Chatzitheodorou, Konstantinos and
Nurminen, Mary and
Kanojia, Diptesh and
Moniz, Helena",
booktitle = "Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)",
month = jun,
year = "2024",
address = "Sheffield, UK",
publisher = "European Association for Machine Translation (EAMT)",
url = "https://aclanthology.org/2024.eamt-1.44",
pages = "548--559",
abstract = "With the advent and success of trainable automatic evaluation metrics, creating annotated machine translation evaluation data sets is increasingly relevant. However, for low-resource languages, gathering such data can be challenging and further insights into evaluation design for opportunistic scenarios are necessary. In this work we explore an evaluation initiative that targets the Spanish{---}-Basque language pair to study the impact of design decisions and the reliability of volunteer contributions. To do that, we compare the work carried out by volunteers and a translation professional in terms of evaluation results and evaluator agreement and examine the control measures used to ensure reliability. Results show similar behaviour regarding general quality assessment but underscore the need for more informative working environments to make evaluation processes more reliable as well as the need for carefully crafted control cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aranberri-2024-analysis">
<titleInfo>
<title>Analysis of the Annotations from a Crowd MT Evaluation Initiative: Case Study for the Spanish-Basque Pair</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nora</namePart>
<namePart type="family">Aranberri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Carolina</namePart>
<namePart type="family">Scarton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charlotte</namePart>
<namePart type="family">Prescott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Bayliss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Oakley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joanna</namePart>
<namePart type="family">Wright</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stuart</namePart>
<namePart type="family">Wrigley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xingyi</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Edward</namePart>
<namePart type="family">Gow-Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachel</namePart>
<namePart type="family">Bawden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Víctor</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Cadwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Lapshinova-Koltunski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Cabarrão</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantinos</namePart>
<namePart type="family">Chatzitheodorou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mary</namePart>
<namePart type="family">Nurminen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diptesh</namePart>
<namePart type="family">Kanojia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Moniz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation (EAMT)</publisher>
<place>
<placeTerm type="text">Sheffield, UK</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the advent and success of trainable automatic evaluation metrics, creating annotated machine translation evaluation data sets is increasingly relevant. However, for low-resource languages, gathering such data can be challenging and further insights into evaluation design for opportunistic scenarios are necessary. In this work we explore an evaluation initiative that targets the Spanish—-Basque language pair to study the impact of design decisions and the reliability of volunteer contributions. To do that, we compare the work carried out by volunteers and a translation professional in terms of evaluation results and evaluator agreement and examine the control measures used to ensure reliability. Results show similar behaviour regarding general quality assessment but underscore the need for more informative working environments to make evaluation processes more reliable as well as the need for carefully crafted control cases.</abstract>
<identifier type="citekey">aranberri-2024-analysis</identifier>
<location>
<url>https://aclanthology.org/2024.eamt-1.44</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>548</start>
<end>559</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Analysis of the Annotations from a Crowd MT Evaluation Initiative: Case Study for the Spanish-Basque Pair
%A Aranberri, Nora
%Y Scarton, Carolina
%Y Prescott, Charlotte
%Y Bayliss, Chris
%Y Oakley, Chris
%Y Wright, Joanna
%Y Wrigley, Stuart
%Y Song, Xingyi
%Y Gow-Smith, Edward
%Y Bawden, Rachel
%Y Sánchez-Cartagena, Víctor M.
%Y Cadwell, Patrick
%Y Lapshinova-Koltunski, Ekaterina
%Y Cabarrão, Vera
%Y Chatzitheodorou, Konstantinos
%Y Nurminen, Mary
%Y Kanojia, Diptesh
%Y Moniz, Helena
%S Proceedings of the 25th Annual Conference of the European Association for Machine Translation (Volume 1)
%D 2024
%8 June
%I European Association for Machine Translation (EAMT)
%C Sheffield, UK
%F aranberri-2024-analysis
%X With the advent and success of trainable automatic evaluation metrics, creating annotated machine translation evaluation data sets is increasingly relevant. However, for low-resource languages, gathering such data can be challenging and further insights into evaluation design for opportunistic scenarios are necessary. In this work we explore an evaluation initiative that targets the Spanish—-Basque language pair to study the impact of design decisions and the reliability of volunteer contributions. To do that, we compare the work carried out by volunteers and a translation professional in terms of evaluation results and evaluator agreement and examine the control measures used to ensure reliability. Results show similar behaviour regarding general quality assessment but underscore the need for more informative working environments to make evaluation processes more reliable as well as the need for carefully crafted control cases.
%U https://aclanthology.org/2024.eamt-1.44
%P 548-559
Markdown (Informal)
[Analysis of the Annotations from a Crowd MT Evaluation Initiative: Case Study for the Spanish-Basque Pair](https://aclanthology.org/2024.eamt-1.44) (Aranberri, EAMT 2024)
ACL