@inproceedings{mauser-etal-2008-automatic,
title = "Automatic Evaluation Measures for Statistical Machine Translation System Optimization",
author = "Mauser, Arne and
Hasan, Sa{\v{s}}a and
Ney, Hermann",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf",
abstract = "Evaluation of machine translation (MT) output is a challenging task. In most cases, there is no single correct translation. In the extreme case, two translations of the same input can have completely different words and sentence structure while still both being perfectly valid. Large projects and competitions for MT research raised the need for reliable and efficient evaluation of MT systems. For the funding side, the obvious motivation is to measure performance and progress of research. This often results in a specific measure or metric taken as primarily evaluation criterion. Do improvements in one measure really lead to improved MT performance? How does a gain in one evaluation metric affect other measures? This paper is going to answer these questions by a number of experiments.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mauser-etal-2008-automatic">
<titleInfo>
<title>Automatic Evaluation Measures for Statistical Machine Translation System Optimization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arne</namePart>
<namePart type="family">Mauser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saša</namePart>
<namePart type="family">Hasan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hermann</namePart>
<namePart type="family">Ney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Evaluation of machine translation (MT) output is a challenging task. In most cases, there is no single correct translation. In the extreme case, two translations of the same input can have completely different words and sentence structure while still both being perfectly valid. Large projects and competitions for MT research raised the need for reliable and efficient evaluation of MT systems. For the funding side, the obvious motivation is to measure performance and progress of research. This often results in a specific measure or metric taken as primarily evaluation criterion. Do improvements in one measure really lead to improved MT performance? How does a gain in one evaluation metric affect other measures? This paper is going to answer these questions by a number of experiments.</abstract>
<identifier type="citekey">mauser-etal-2008-automatic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Evaluation Measures for Statistical Machine Translation System Optimization
%A Mauser, Arne
%A Hasan, Saša
%A Ney, Hermann
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F mauser-etal-2008-automatic
%X Evaluation of machine translation (MT) output is a challenging task. In most cases, there is no single correct translation. In the extreme case, two translations of the same input can have completely different words and sentence structure while still both being perfectly valid. Large projects and competitions for MT research raised the need for reliable and efficient evaluation of MT systems. For the funding side, the obvious motivation is to measure performance and progress of research. This often results in a specific measure or metric taken as primarily evaluation criterion. Do improvements in one measure really lead to improved MT performance? How does a gain in one evaluation metric affect other measures? This paper is going to answer these questions by a number of experiments.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf
Markdown (Informal)
[Automatic Evaluation Measures for Statistical Machine Translation System Optimization](http://www.lrec-conf.org/proceedings/lrec2008/pdf/785_paper.pdf) (Mauser et al., LREC 2008)
ACL