@inproceedings{elnokrashy-kocmi-2023-ebleu,
title = "e{BLEU}: Unexpectedly Good Machine Translation Evaluation Using Simple Word Embeddings",
author = "ElNokrashy, Muhammad and
Kocmi, Tom",
editor = "Koehn, Philipp and
Haddow, Barry and
Kocmi, Tom and
Monz, Christof",
booktitle = "Proceedings of the Eighth Conference on Machine Translation",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.wmt-1.61",
doi = "10.18653/v1/2023.wmt-1.61",
pages = "746--750",
abstract = "We propose eBLEU, a metric inspired by BLEU metric that uses embedding similarities instead of string matches. We introduce meaning diffusion vectors to enable matching n-grams of semantically similar words in a BLEU-like algorithm, using efficient, non-contextual word embeddings like fastText. On WMT23 data, eBLEU beats BLEU and ChrF by around 3.8{\%} system-level score, approaching BERTScore at −0.9{\%} absolute difference. In WMT22 scenarios, eBLEU outperforms f101spBLEU and ChrF in MQM by 2.2{\%}−3.6{\%}. Curiously, on MTurk evaluations, eBLEU surpasses past methods by 3.9{\%}−8.2{\%} (f200spBLEU, COMET-22). eBLEU presents an interesting middle-ground between traditional metrics and pretrained metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elnokrashy-kocmi-2023-ebleu">
<titleInfo>
<title>eBLEU: Unexpectedly Good Machine Translation Evaluation Using Simple Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">ElNokrashy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose eBLEU, a metric inspired by BLEU metric that uses embedding similarities instead of string matches. We introduce meaning diffusion vectors to enable matching n-grams of semantically similar words in a BLEU-like algorithm, using efficient, non-contextual word embeddings like fastText. On WMT23 data, eBLEU beats BLEU and ChrF by around 3.8% system-level score, approaching BERTScore at −0.9% absolute difference. In WMT22 scenarios, eBLEU outperforms f101spBLEU and ChrF in MQM by 2.2%−3.6%. Curiously, on MTurk evaluations, eBLEU surpasses past methods by 3.9%−8.2% (f200spBLEU, COMET-22). eBLEU presents an interesting middle-ground between traditional metrics and pretrained metrics.</abstract>
<identifier type="citekey">elnokrashy-kocmi-2023-ebleu</identifier>
<identifier type="doi">10.18653/v1/2023.wmt-1.61</identifier>
<location>
<url>https://aclanthology.org/2023.wmt-1.61</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>746</start>
<end>750</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T eBLEU: Unexpectedly Good Machine Translation Evaluation Using Simple Word Embeddings
%A ElNokrashy, Muhammad
%A Kocmi, Tom
%Y Koehn, Philipp
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Monz, Christof
%S Proceedings of the Eighth Conference on Machine Translation
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F elnokrashy-kocmi-2023-ebleu
%X We propose eBLEU, a metric inspired by BLEU metric that uses embedding similarities instead of string matches. We introduce meaning diffusion vectors to enable matching n-grams of semantically similar words in a BLEU-like algorithm, using efficient, non-contextual word embeddings like fastText. On WMT23 data, eBLEU beats BLEU and ChrF by around 3.8% system-level score, approaching BERTScore at −0.9% absolute difference. In WMT22 scenarios, eBLEU outperforms f101spBLEU and ChrF in MQM by 2.2%−3.6%. Curiously, on MTurk evaluations, eBLEU surpasses past methods by 3.9%−8.2% (f200spBLEU, COMET-22). eBLEU presents an interesting middle-ground between traditional metrics and pretrained metrics.
%R 10.18653/v1/2023.wmt-1.61
%U https://aclanthology.org/2023.wmt-1.61
%U https://doi.org/10.18653/v1/2023.wmt-1.61
%P 746-750
Markdown (Informal)
[eBLEU: Unexpectedly Good Machine Translation Evaluation Using Simple Word Embeddings](https://aclanthology.org/2023.wmt-1.61) (ElNokrashy & Kocmi, WMT 2023)
ACL