@inproceedings{zhan-etal-2021-difficulty,
title = "Difficulty-Aware Machine Translation Evaluation",
author = "Zhan, Runzhe and
Liu, Xuebo and
Wong, Derek F. and
Chao, Lidia S.",
booktitle = "Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.acl-short.5",
doi = "10.18653/v1/2021.acl-short.5",
pages = "26--32",
abstract = "The high-quality translation results produced by machine translation (MT) systems still pose a huge challenge for automatic evaluation. Current MT evaluation pays the same attention to each sentence component, while the questions of real-world examinations (e.g., university examinations) have different difficulties and weightings. In this paper, we propose a novel difficulty-aware MT evaluation metric, expanding the evaluation dimension by taking translation difficulty into consideration. A translation that fails to be predicted by most MT systems will be treated as a difficult one and assigned a large weight in the final score function, and conversely. Experimental results on the WMT19 English-German Metrics shared tasks show that our proposed method outperforms commonly used MT metrics in terms of human correlation. In particular, our proposed method performs well even when all the MT systems are very competitive, which is when most existing metrics fail to distinguish between them. The source code is freely available at https://github.com/NLP2CT/Difficulty-Aware-MT-Evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhan-etal-2021-difficulty">
<titleInfo>
<title>Difficulty-Aware Machine Translation Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Runzhe</namePart>
<namePart type="family">Zhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuebo</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Wong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lidia</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Chao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The high-quality translation results produced by machine translation (MT) systems still pose a huge challenge for automatic evaluation. Current MT evaluation pays the same attention to each sentence component, while the questions of real-world examinations (e.g., university examinations) have different difficulties and weightings. In this paper, we propose a novel difficulty-aware MT evaluation metric, expanding the evaluation dimension by taking translation difficulty into consideration. A translation that fails to be predicted by most MT systems will be treated as a difficult one and assigned a large weight in the final score function, and conversely. Experimental results on the WMT19 English-German Metrics shared tasks show that our proposed method outperforms commonly used MT metrics in terms of human correlation. In particular, our proposed method performs well even when all the MT systems are very competitive, which is when most existing metrics fail to distinguish between them. The source code is freely available at https://github.com/NLP2CT/Difficulty-Aware-MT-Evaluation.</abstract>
<identifier type="citekey">zhan-etal-2021-difficulty</identifier>
<identifier type="doi">10.18653/v1/2021.acl-short.5</identifier>
<location>
<url>https://aclanthology.org/2021.acl-short.5</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>26</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Difficulty-Aware Machine Translation Evaluation
%A Zhan, Runzhe
%A Liu, Xuebo
%A Wong, Derek F.
%A Chao, Lidia S.
%S Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F zhan-etal-2021-difficulty
%X The high-quality translation results produced by machine translation (MT) systems still pose a huge challenge for automatic evaluation. Current MT evaluation pays the same attention to each sentence component, while the questions of real-world examinations (e.g., university examinations) have different difficulties and weightings. In this paper, we propose a novel difficulty-aware MT evaluation metric, expanding the evaluation dimension by taking translation difficulty into consideration. A translation that fails to be predicted by most MT systems will be treated as a difficult one and assigned a large weight in the final score function, and conversely. Experimental results on the WMT19 English-German Metrics shared tasks show that our proposed method outperforms commonly used MT metrics in terms of human correlation. In particular, our proposed method performs well even when all the MT systems are very competitive, which is when most existing metrics fail to distinguish between them. The source code is freely available at https://github.com/NLP2CT/Difficulty-Aware-MT-Evaluation.
%R 10.18653/v1/2021.acl-short.5
%U https://aclanthology.org/2021.acl-short.5
%U https://doi.org/10.18653/v1/2021.acl-short.5
%P 26-32
Markdown (Informal)
[Difficulty-Aware Machine Translation Evaluation](https://aclanthology.org/2021.acl-short.5) (Zhan et al., ACL-IJCNLP 2021)
ACL
- Runzhe Zhan, Xuebo Liu, Derek F. Wong, and Lidia S. Chao. 2021. Difficulty-Aware Machine Translation Evaluation. In Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pages 26–32, Online. Association for Computational Linguistics.