@inproceedings{costa-etal-2016-building,
title = "Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact",
author = "Costa, {\^A}ngela and
Correia, Rui and
Coheur, Lu{\'\i}sa",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1044",
pages = "288--292",
abstract = "In this paper we describe a corpus of automatic translations annotated with both error type and quality. The 300 sentences that we have selected were generated by Google Translate, Systran and two in-house Machine Translation systems that use Moses technology. The errors present on the translations were annotated with an error taxonomy that divides errors in five main linguistic categories (Orthography, Lexis, Grammar, Semantics and Discourse), reflecting the language level where the error is located. After the error annotation process, we accessed the translation quality of each sentence using a four point comprehension scale from 1 to 5. Both tasks of error and quality annotation were performed by two different annotators, achieving good levels of inter-annotator agreement. The creation of this corpus allowed us to use it as training data for a translation quality classifier. We concluded on error severity by observing the outputs of two machine learning classifiers: a decision tree and a regression model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="costa-etal-2016-building">
<titleInfo>
<title>Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ângela</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Correia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luísa</namePart>
<namePart type="family">Coheur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we describe a corpus of automatic translations annotated with both error type and quality. The 300 sentences that we have selected were generated by Google Translate, Systran and two in-house Machine Translation systems that use Moses technology. The errors present on the translations were annotated with an error taxonomy that divides errors in five main linguistic categories (Orthography, Lexis, Grammar, Semantics and Discourse), reflecting the language level where the error is located. After the error annotation process, we accessed the translation quality of each sentence using a four point comprehension scale from 1 to 5. Both tasks of error and quality annotation were performed by two different annotators, achieving good levels of inter-annotator agreement. The creation of this corpus allowed us to use it as training data for a translation quality classifier. We concluded on error severity by observing the outputs of two machine learning classifiers: a decision tree and a regression model.</abstract>
<identifier type="citekey">costa-etal-2016-building</identifier>
<location>
<url>https://aclanthology.org/L16-1044</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>288</start>
<end>292</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact
%A Costa, Ângela
%A Correia, Rui
%A Coheur, Luísa
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F costa-etal-2016-building
%X In this paper we describe a corpus of automatic translations annotated with both error type and quality. The 300 sentences that we have selected were generated by Google Translate, Systran and two in-house Machine Translation systems that use Moses technology. The errors present on the translations were annotated with an error taxonomy that divides errors in five main linguistic categories (Orthography, Lexis, Grammar, Semantics and Discourse), reflecting the language level where the error is located. After the error annotation process, we accessed the translation quality of each sentence using a four point comprehension scale from 1 to 5. Both tasks of error and quality annotation were performed by two different annotators, achieving good levels of inter-annotator agreement. The creation of this corpus allowed us to use it as training data for a translation quality classifier. We concluded on error severity by observing the outputs of two machine learning classifiers: a decision tree and a regression model.
%U https://aclanthology.org/L16-1044
%P 288-292
Markdown (Informal)
[Building a Corpus of Errors and Quality in Machine Translation: Experiments on Error Impact](https://aclanthology.org/L16-1044) (Costa et al., LREC 2016)
ACL