@inproceedings{lo-2020-extended,
title = "Extended Study on Using Pretrained Language Models and {Y}i{S}i-1 for Machine Translation Evaluation",
author = "Lo, Chi-kiu",
booktitle = "Proceedings of the Fifth Conference on Machine Translation",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wmt-1.99",
pages = "895--902",
abstract = "We present an extended study on using pretrained language models and YiSi-1 for machine translation evaluation. Although the recently proposed contextual embedding based metrics, YiSi-1, significantly outperform BLEU and other metrics in correlating with human judgment on translation quality, we have yet to understand the full strength of using pretrained language models for machine translation evaluation. In this paper, we study YiSi-1{'}s correlation with human translation quality judgment by varying three major attributes (which architecture; which inter- mediate layer; whether it is monolingual or multilingual) of the pretrained language mod- els. Results of the study show further improvements over YiSi-1 on the WMT 2019 Metrics shared task. We also describe the pretrained language model we trained for evaluating Inuktitut machine translation output.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lo-2020-extended">
<titleInfo>
<title>Extended Study on Using Pretrained Language Models and YiSi-1 for Machine Translation Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chi-kiu</namePart>
<namePart type="family">Lo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present an extended study on using pretrained language models and YiSi-1 for machine translation evaluation. Although the recently proposed contextual embedding based metrics, YiSi-1, significantly outperform BLEU and other metrics in correlating with human judgment on translation quality, we have yet to understand the full strength of using pretrained language models for machine translation evaluation. In this paper, we study YiSi-1’s correlation with human translation quality judgment by varying three major attributes (which architecture; which inter- mediate layer; whether it is monolingual or multilingual) of the pretrained language mod- els. Results of the study show further improvements over YiSi-1 on the WMT 2019 Metrics shared task. We also describe the pretrained language model we trained for evaluating Inuktitut machine translation output.</abstract>
<identifier type="citekey">lo-2020-extended</identifier>
<location>
<url>https://aclanthology.org/2020.wmt-1.99</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>895</start>
<end>902</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Extended Study on Using Pretrained Language Models and YiSi-1 for Machine Translation Evaluation
%A Lo, Chi-kiu
%S Proceedings of the Fifth Conference on Machine Translation
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F lo-2020-extended
%X We present an extended study on using pretrained language models and YiSi-1 for machine translation evaluation. Although the recently proposed contextual embedding based metrics, YiSi-1, significantly outperform BLEU and other metrics in correlating with human judgment on translation quality, we have yet to understand the full strength of using pretrained language models for machine translation evaluation. In this paper, we study YiSi-1’s correlation with human translation quality judgment by varying three major attributes (which architecture; which inter- mediate layer; whether it is monolingual or multilingual) of the pretrained language mod- els. Results of the study show further improvements over YiSi-1 on the WMT 2019 Metrics shared task. We also describe the pretrained language model we trained for evaluating Inuktitut machine translation output.
%U https://aclanthology.org/2020.wmt-1.99
%P 895-902
Markdown (Informal)
[Extended Study on Using Pretrained Language Models and YiSi-1 for Machine Translation Evaluation](https://aclanthology.org/2020.wmt-1.99) (Lo, WMT 2020)
ACL