@inproceedings{turian-etal-2003-evaluation,
title = "Evaluation of machine translation and its evaluation",
author = "Turian, Joseph P. and
Shen, Luke and
Melamed, I. Dan",
booktitle = "Proceedings of Machine Translation Summit IX: Papers",
month = sep # " 23-27",
year = "2003",
address = "New Orleans, USA",
url = "https://aclanthology.org/2003.mtsummit-papers.51",
abstract = "Evaluation of MT evaluation measures is limited by inconsistent human judgment data. Nonetheless, machine translation can be evaluated using the well-known measures precision, recall, and their average, the F-measure. The unigram-based F-measure has significantly higher correlation with human judgments than recently proposed alternatives. More importantly, this standard measure has an intuitive graphical interpretation, which can facilitate insight into how MT systems might be improved. The relevant software is publicly available from \url{http://nlp.cs.nyu.edu/GTM/}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="turian-etal-2003-evaluation">
<titleInfo>
<title>Evaluation of machine translation and its evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Turian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">I</namePart>
<namePart type="given">Dan</namePart>
<namePart type="family">Melamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2003-sep 23-27</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit IX: Papers</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">New Orleans, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Evaluation of MT evaluation measures is limited by inconsistent human judgment data. Nonetheless, machine translation can be evaluated using the well-known measures precision, recall, and their average, the F-measure. The unigram-based F-measure has significantly higher correlation with human judgments than recently proposed alternatives. More importantly, this standard measure has an intuitive graphical interpretation, which can facilitate insight into how MT systems might be improved. The relevant software is publicly available from http://nlp.cs.nyu.edu/GTM/.</abstract>
<identifier type="citekey">turian-etal-2003-evaluation</identifier>
<location>
<url>https://aclanthology.org/2003.mtsummit-papers.51</url>
</location>
<part>
<date>2003-sep 23-27</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation of machine translation and its evaluation
%A Turian, Joseph P.
%A Shen, Luke
%A Melamed, I. Dan
%S Proceedings of Machine Translation Summit IX: Papers
%D 2003
%8 sep 23 27
%C New Orleans, USA
%F turian-etal-2003-evaluation
%X Evaluation of MT evaluation measures is limited by inconsistent human judgment data. Nonetheless, machine translation can be evaluated using the well-known measures precision, recall, and their average, the F-measure. The unigram-based F-measure has significantly higher correlation with human judgments than recently proposed alternatives. More importantly, this standard measure has an intuitive graphical interpretation, which can facilitate insight into how MT systems might be improved. The relevant software is publicly available from http://nlp.cs.nyu.edu/GTM/.
%U https://aclanthology.org/2003.mtsummit-papers.51
Markdown (Informal)
[Evaluation of machine translation and its evaluation](https://aclanthology.org/2003.mtsummit-papers.51) (Turian et al., MTSummit 2003)
ACL