@inproceedings{rei-etal-2022-searching,
title = "Searching for {COMETINHO}: The Little Metric That Could",
author = "Rei, Ricardo and
Farinha, Ana C and
de Souza, Jos{\'e} G.C. and
Ramos, Pedro G. and
Martins, Andr{\'e} F.T. and
Coheur, Luisa and
Lavie, Alon",
booktitle = "Proceedings of the 23rd Annual Conference of the European Association for Machine Translation",
month = jun,
year = "2022",
address = "Ghent, Belgium",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2022.eamt-1.9",
pages = "61--70",
abstract = "In recent years, several neural fine-tuned machine translation evaluation metrics such as COMET and BLEURT have been proposed. These metrics achieve much higher correlations with human judgments than lexical overlap metrics at the cost of computational efficiency and simplicity, limiting their applications to scenarios in which one has to score thousands of translation hypothesis (e.g. scoring multiple systems or Minimum Bayes Risk decoding). In this paper, we explore optimization techniques, pruning, and knowledge distillation to create more compact and faster COMET versions. Our results show that just by optimizing the code through the use of caching and length batching we can reduce inference time between 39{\%} and 65{\%} when scoring multiple systems. Also, we show that pruning COMET can lead to a 21{\%} model reduction without affecting the model{'}s accuracy beyond 0.01 Kendall tau correlation. Furthermore, we present DISTIL-COMET a lightweight distilled version that is 80{\%} smaller and 2.128x faster while attaining a performance close to the original model and above strong baselines such as BERTSCORE and PRISM.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rei-etal-2022-searching">
<titleInfo>
<title>Searching for COMETINHO: The Little Metric That Could</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ricardo</namePart>
<namePart type="family">Rei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Farinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">G.C.</namePart>
<namePart type="family">de Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Ramos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="given">F.T.</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luisa</namePart>
<namePart type="family">Coheur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alon</namePart>
<namePart type="family">Lavie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Conference of the European Association for Machine Translation</title>
</titleInfo>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Ghent, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, several neural fine-tuned machine translation evaluation metrics such as COMET and BLEURT have been proposed. These metrics achieve much higher correlations with human judgments than lexical overlap metrics at the cost of computational efficiency and simplicity, limiting their applications to scenarios in which one has to score thousands of translation hypothesis (e.g. scoring multiple systems or Minimum Bayes Risk decoding). In this paper, we explore optimization techniques, pruning, and knowledge distillation to create more compact and faster COMET versions. Our results show that just by optimizing the code through the use of caching and length batching we can reduce inference time between 39% and 65% when scoring multiple systems. Also, we show that pruning COMET can lead to a 21% model reduction without affecting the model’s accuracy beyond 0.01 Kendall tau correlation. Furthermore, we present DISTIL-COMET a lightweight distilled version that is 80% smaller and 2.128x faster while attaining a performance close to the original model and above strong baselines such as BERTSCORE and PRISM.</abstract>
<identifier type="citekey">rei-etal-2022-searching</identifier>
<location>
<url>https://aclanthology.org/2022.eamt-1.9</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>61</start>
<end>70</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Searching for COMETINHO: The Little Metric That Could
%A Rei, Ricardo
%A Farinha, Ana C.
%A de Souza, José G.C.
%A Ramos, Pedro G.
%A Martins, André F.T.
%A Coheur, Luisa
%A Lavie, Alon
%S Proceedings of the 23rd Annual Conference of the European Association for Machine Translation
%D 2022
%8 June
%I European Association for Machine Translation
%C Ghent, Belgium
%F rei-etal-2022-searching
%X In recent years, several neural fine-tuned machine translation evaluation metrics such as COMET and BLEURT have been proposed. These metrics achieve much higher correlations with human judgments than lexical overlap metrics at the cost of computational efficiency and simplicity, limiting their applications to scenarios in which one has to score thousands of translation hypothesis (e.g. scoring multiple systems or Minimum Bayes Risk decoding). In this paper, we explore optimization techniques, pruning, and knowledge distillation to create more compact and faster COMET versions. Our results show that just by optimizing the code through the use of caching and length batching we can reduce inference time between 39% and 65% when scoring multiple systems. Also, we show that pruning COMET can lead to a 21% model reduction without affecting the model’s accuracy beyond 0.01 Kendall tau correlation. Furthermore, we present DISTIL-COMET a lightweight distilled version that is 80% smaller and 2.128x faster while attaining a performance close to the original model and above strong baselines such as BERTSCORE and PRISM.
%U https://aclanthology.org/2022.eamt-1.9
%P 61-70
Markdown (Informal)
[Searching for COMETINHO: The Little Metric That Could](https://aclanthology.org/2022.eamt-1.9) (Rei et al., EAMT 2022)
ACL
- Ricardo Rei, Ana C Farinha, José G.C. de Souza, Pedro G. Ramos, André F.T. Martins, Luisa Coheur, and Alon Lavie. 2022. Searching for COMETINHO: The Little Metric That Could. In Proceedings of the 23rd Annual Conference of the European Association for Machine Translation, pages 61–70, Ghent, Belgium. European Association for Machine Translation.