@inproceedings{guillou-hardmeier-2016-protest,
title = "{PROTEST}: A Test Suite for Evaluating Pronouns in Machine Translation",
author = "Guillou, Liane and
Hardmeier, Christian",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1100",
pages = "636--643",
abstract = "We present PROTEST, a test suite for the evaluation of pronoun translation by MT systems. The test suite comprises 250 hand-selected pronoun tokens and an automatic evaluation method which compares the translations of pronouns in MT output with those in the reference translation. Pronoun translations that do not match the reference are referred for manual evaluation. PROTEST is designed to support analysis of system performance at the level of individual pronoun groups, rather than to provide a single aggregate measure over all pronouns. We wish to encourage detailed analyses to highlight issues in the handling of specific linguistic mechanisms by MT systems, thereby contributing to a better understanding of those problems involved in translating pronouns. We present two use cases for PROTEST: a) for measuring improvement/degradation of an incremental system change, and b) for comparing the performance of a group of systems whose design may be largely unrelated. Following the latter use case, we demonstrate the application of PROTEST to the evaluation of the systems submitted to the DiscoMT 2015 shared task on pronoun translation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guillou-hardmeier-2016-protest">
<titleInfo>
<title>PROTEST: A Test Suite for Evaluating Pronouns in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Liane</namePart>
<namePart type="family">Guillou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present PROTEST, a test suite for the evaluation of pronoun translation by MT systems. The test suite comprises 250 hand-selected pronoun tokens and an automatic evaluation method which compares the translations of pronouns in MT output with those in the reference translation. Pronoun translations that do not match the reference are referred for manual evaluation. PROTEST is designed to support analysis of system performance at the level of individual pronoun groups, rather than to provide a single aggregate measure over all pronouns. We wish to encourage detailed analyses to highlight issues in the handling of specific linguistic mechanisms by MT systems, thereby contributing to a better understanding of those problems involved in translating pronouns. We present two use cases for PROTEST: a) for measuring improvement/degradation of an incremental system change, and b) for comparing the performance of a group of systems whose design may be largely unrelated. Following the latter use case, we demonstrate the application of PROTEST to the evaluation of the systems submitted to the DiscoMT 2015 shared task on pronoun translation.</abstract>
<identifier type="citekey">guillou-hardmeier-2016-protest</identifier>
<location>
<url>https://aclanthology.org/L16-1100</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>636</start>
<end>643</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PROTEST: A Test Suite for Evaluating Pronouns in Machine Translation
%A Guillou, Liane
%A Hardmeier, Christian
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F guillou-hardmeier-2016-protest
%X We present PROTEST, a test suite for the evaluation of pronoun translation by MT systems. The test suite comprises 250 hand-selected pronoun tokens and an automatic evaluation method which compares the translations of pronouns in MT output with those in the reference translation. Pronoun translations that do not match the reference are referred for manual evaluation. PROTEST is designed to support analysis of system performance at the level of individual pronoun groups, rather than to provide a single aggregate measure over all pronouns. We wish to encourage detailed analyses to highlight issues in the handling of specific linguistic mechanisms by MT systems, thereby contributing to a better understanding of those problems involved in translating pronouns. We present two use cases for PROTEST: a) for measuring improvement/degradation of an incremental system change, and b) for comparing the performance of a group of systems whose design may be largely unrelated. Following the latter use case, we demonstrate the application of PROTEST to the evaluation of the systems submitted to the DiscoMT 2015 shared task on pronoun translation.
%U https://aclanthology.org/L16-1100
%P 636-643
Markdown (Informal)
[PROTEST: A Test Suite for Evaluating Pronouns in Machine Translation](https://aclanthology.org/L16-1100) (Guillou & Hardmeier, LREC 2016)
ACL