@inproceedings{goto-etal-2014-crowdsourcing,
title = "Crowdsourcing for Evaluating Machine Translation Quality",
author = "Goto, Shinsuke and
Lin, Donghui and
Ishida, Toru",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/756_Paper.pdf",
pages = "3456--3463",
abstract = "The recent popularity of machine translation has increased the demand for the evaluation of translations. However, the traditional evaluation approach, manual checking by a bilingual professional, is too expensive and too slow. In this study, we confirm the feasibility of crowdsourcing by analyzing the accuracy of crowdsourcing translation evaluations. We compare crowdsourcing scores to professional scores with regard to three metrics: translation-score, sentence-score, and system-score. A Chinese to English translation evaluation task was designed using around the NTCIR-9 PATENT parallel corpus with the goal being 5-range evaluations of adequacy and fluency. The experiment shows that the average score of crowdsource workers well matches professional evaluation results. The system-score comparison strongly indicates that crowdsourcing can be used to find the best translation system given the input of 10 source sentence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="goto-etal-2014-crowdsourcing">
<titleInfo>
<title>Crowdsourcing for Evaluating Machine Translation Quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shinsuke</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Donghui</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Toru</namePart>
<namePart type="family">Ishida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The recent popularity of machine translation has increased the demand for the evaluation of translations. However, the traditional evaluation approach, manual checking by a bilingual professional, is too expensive and too slow. In this study, we confirm the feasibility of crowdsourcing by analyzing the accuracy of crowdsourcing translation evaluations. We compare crowdsourcing scores to professional scores with regard to three metrics: translation-score, sentence-score, and system-score. A Chinese to English translation evaluation task was designed using around the NTCIR-9 PATENT parallel corpus with the goal being 5-range evaluations of adequacy and fluency. The experiment shows that the average score of crowdsource workers well matches professional evaluation results. The system-score comparison strongly indicates that crowdsourcing can be used to find the best translation system given the input of 10 source sentence.</abstract>
<identifier type="citekey">goto-etal-2014-crowdsourcing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/756_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>3456</start>
<end>3463</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Crowdsourcing for Evaluating Machine Translation Quality
%A Goto, Shinsuke
%A Lin, Donghui
%A Ishida, Toru
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F goto-etal-2014-crowdsourcing
%X The recent popularity of machine translation has increased the demand for the evaluation of translations. However, the traditional evaluation approach, manual checking by a bilingual professional, is too expensive and too slow. In this study, we confirm the feasibility of crowdsourcing by analyzing the accuracy of crowdsourcing translation evaluations. We compare crowdsourcing scores to professional scores with regard to three metrics: translation-score, sentence-score, and system-score. A Chinese to English translation evaluation task was designed using around the NTCIR-9 PATENT parallel corpus with the goal being 5-range evaluations of adequacy and fluency. The experiment shows that the average score of crowdsource workers well matches professional evaluation results. The system-score comparison strongly indicates that crowdsourcing can be used to find the best translation system given the input of 10 source sentence.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/756_Paper.pdf
%P 3456-3463
Markdown (Informal)
[Crowdsourcing for Evaluating Machine Translation Quality](http://www.lrec-conf.org/proceedings/lrec2014/pdf/756_Paper.pdf) (Goto et al., LREC 2014)
ACL
- Shinsuke Goto, Donghui Lin, and Toru Ishida. 2014. Crowdsourcing for Evaluating Machine Translation Quality. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 3456–3463, Reykjavik, Iceland. European Language Resources Association (ELRA).