@inproceedings{takahashi-etal-2021-multilingual,
title = "Multilingual Machine Translation Evaluation Metrics Fine-tuned on Pseudo-Negative Examples for {WMT} 2021 Metrics Task",
author = "Takahashi, Kosuke and
Ishibashi, Yoichi and
Sudoh, Katsuhito and
Nakamura, Satoshi",
booktitle = "Proceedings of the Sixth Conference on Machine Translation",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wmt-1.113",
pages = "1049--1052",
abstract = "This paper describes our submission to the WMT2021 shared metrics task. Our metric is operative to segment-level and system-level translations. Our belief toward a better metric is to detect a significant error that cannot be missed in the real practice cases of evaluation. For that reason, we used pseudo-negative examples in which attributes of some words are transferred to the reversed attribute words, and we build evaluation models to handle such serious mistakes of translations. We fine-tune a multilingual largely pre-trained model on the provided corpus of past years{'} metric task and fine-tune again further on the synthetic negative examples that are derived from the same fine-tune corpus. From the evaluation results of the WMT21{'}s development corpus, fine-tuning on the pseudo-negatives using WMT15-17 and WMT18-20 metric corpus achieved a better Pearson{'}s correlation score than the one fine-tuned without negative examples. Our submitted models,hyp+src{\_}hyp+ref and hyp+src{\_}hyp+ref.negative, are the plain model using WMT18-20 and the one additionally fine-tuned on negative samples, respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="takahashi-etal-2021-multilingual">
<titleInfo>
<title>Multilingual Machine Translation Evaluation Metrics Fine-tuned on Pseudo-Negative Examples for WMT 2021 Metrics Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kosuke</namePart>
<namePart type="family">Takahashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yoichi</namePart>
<namePart type="family">Ishibashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katsuhito</namePart>
<namePart type="family">Sudoh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satoshi</namePart>
<namePart type="family">Nakamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Conference on Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes our submission to the WMT2021 shared metrics task. Our metric is operative to segment-level and system-level translations. Our belief toward a better metric is to detect a significant error that cannot be missed in the real practice cases of evaluation. For that reason, we used pseudo-negative examples in which attributes of some words are transferred to the reversed attribute words, and we build evaluation models to handle such serious mistakes of translations. We fine-tune a multilingual largely pre-trained model on the provided corpus of past years’ metric task and fine-tune again further on the synthetic negative examples that are derived from the same fine-tune corpus. From the evaluation results of the WMT21’s development corpus, fine-tuning on the pseudo-negatives using WMT15-17 and WMT18-20 metric corpus achieved a better Pearson’s correlation score than the one fine-tuned without negative examples. Our submitted models,hyp+src_hyp+ref and hyp+src_hyp+ref.negative, are the plain model using WMT18-20 and the one additionally fine-tuned on negative samples, respectively.</abstract>
<identifier type="citekey">takahashi-etal-2021-multilingual</identifier>
<location>
<url>https://aclanthology.org/2021.wmt-1.113</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>1049</start>
<end>1052</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Machine Translation Evaluation Metrics Fine-tuned on Pseudo-Negative Examples for WMT 2021 Metrics Task
%A Takahashi, Kosuke
%A Ishibashi, Yoichi
%A Sudoh, Katsuhito
%A Nakamura, Satoshi
%S Proceedings of the Sixth Conference on Machine Translation
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F takahashi-etal-2021-multilingual
%X This paper describes our submission to the WMT2021 shared metrics task. Our metric is operative to segment-level and system-level translations. Our belief toward a better metric is to detect a significant error that cannot be missed in the real practice cases of evaluation. For that reason, we used pseudo-negative examples in which attributes of some words are transferred to the reversed attribute words, and we build evaluation models to handle such serious mistakes of translations. We fine-tune a multilingual largely pre-trained model on the provided corpus of past years’ metric task and fine-tune again further on the synthetic negative examples that are derived from the same fine-tune corpus. From the evaluation results of the WMT21’s development corpus, fine-tuning on the pseudo-negatives using WMT15-17 and WMT18-20 metric corpus achieved a better Pearson’s correlation score than the one fine-tuned without negative examples. Our submitted models,hyp+src_hyp+ref and hyp+src_hyp+ref.negative, are the plain model using WMT18-20 and the one additionally fine-tuned on negative samples, respectively.
%U https://aclanthology.org/2021.wmt-1.113
%P 1049-1052
Markdown (Informal)
[Multilingual Machine Translation Evaluation Metrics Fine-tuned on Pseudo-Negative Examples for WMT 2021 Metrics Task](https://aclanthology.org/2021.wmt-1.113) (Takahashi et al., WMT 2021)
ACL