@inproceedings{guta-etal-2025-green,
title = "The Green {KNIGHT}: Green Machine Translation with Knowledge-Distilled, Narrow, Inexpensive, Greedy, Hybrid Transformers",
author = "Guta, Andreas and
Petrick, Frithjof and
Pol{\'a}k, Peter",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.316/",
pages = "5916--5931",
ISBN = "979-8-89176-335-7",
abstract = "State-of-the-art neural machine translation (NMT) models deliver high-quality translations at the expense of high inference latency and energy consumption, requiring vast GPU fleets and contributing significantly to carbon emissions. To democratize and ``green'' NMT, we introduce the Green KNIGHT, a hardware-agnostic collection of recipes to optimize translation speed and energy consumption, with only a moderate trade-off in quality. On high-resource En{\textrightarrow}De and En{\textrightarrow}Ko benchmarks, we achieve up to 117{\texttimes} CPU speedup and 98.2{\%} energy savings with 9{\%} relative BLEU decrease. On WMT 2014 En{\textrightarrow}De and En{\textrightarrow}Fr benchmarks, we obtain up to 140{\texttimes} speedup with 98.7{\%} energy savings, while staying within 10{--}12{\%} relative BLEU decrease. Our results demonstrate that efficient and environmentally conscious NMT can be realized through optimizations built on well-understood, off-the-shelf techniques with no custom low-level code required, making our approach immediately deployable in real-world translation pipelines."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guta-etal-2025-green">
<titleInfo>
<title>The Green KNIGHT: Green Machine Translation with Knowledge-Distilled, Narrow, Inexpensive, Greedy, Hybrid Transformers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Guta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frithjof</namePart>
<namePart type="family">Petrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Polák</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>State-of-the-art neural machine translation (NMT) models deliver high-quality translations at the expense of high inference latency and energy consumption, requiring vast GPU fleets and contributing significantly to carbon emissions. To democratize and “green” NMT, we introduce the Green KNIGHT, a hardware-agnostic collection of recipes to optimize translation speed and energy consumption, with only a moderate trade-off in quality. On high-resource En→De and En→Ko benchmarks, we achieve up to 117× CPU speedup and 98.2% energy savings with 9% relative BLEU decrease. On WMT 2014 En→De and En→Fr benchmarks, we obtain up to 140× speedup with 98.7% energy savings, while staying within 10–12% relative BLEU decrease. Our results demonstrate that efficient and environmentally conscious NMT can be realized through optimizations built on well-understood, off-the-shelf techniques with no custom low-level code required, making our approach immediately deployable in real-world translation pipelines.</abstract>
<identifier type="citekey">guta-etal-2025-green</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.316/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>5916</start>
<end>5931</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Green KNIGHT: Green Machine Translation with Knowledge-Distilled, Narrow, Inexpensive, Greedy, Hybrid Transformers
%A Guta, Andreas
%A Petrick, Frithjof
%A Polák, Peter
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F guta-etal-2025-green
%X State-of-the-art neural machine translation (NMT) models deliver high-quality translations at the expense of high inference latency and energy consumption, requiring vast GPU fleets and contributing significantly to carbon emissions. To democratize and “green” NMT, we introduce the Green KNIGHT, a hardware-agnostic collection of recipes to optimize translation speed and energy consumption, with only a moderate trade-off in quality. On high-resource En→De and En→Ko benchmarks, we achieve up to 117× CPU speedup and 98.2% energy savings with 9% relative BLEU decrease. On WMT 2014 En→De and En→Fr benchmarks, we obtain up to 140× speedup with 98.7% energy savings, while staying within 10–12% relative BLEU decrease. Our results demonstrate that efficient and environmentally conscious NMT can be realized through optimizations built on well-understood, off-the-shelf techniques with no custom low-level code required, making our approach immediately deployable in real-world translation pipelines.
%U https://aclanthology.org/2025.findings-emnlp.316/
%P 5916-5931
Markdown (Informal)
[The Green KNIGHT: Green Machine Translation with Knowledge-Distilled, Narrow, Inexpensive, Greedy, Hybrid Transformers](https://aclanthology.org/2025.findings-emnlp.316/) (Guta et al., Findings 2025)
ACL