@inproceedings{demirag-etal-2024-benchmarking,
title = "Benchmarking Diffusion Models for Machine Translation",
author = "Demirag, Yunus and
Liu, Danni and
Niehues, Jan",
editor = "Falk, Neele and
Papi, Sara and
Zhang, Mike",
booktitle = "Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.eacl-srw.25",
pages = "313--324",
abstract = "Diffusion models have recently shown great potential on many generative tasks.In this work, we explore diffusion models for machine translation (MT).We adapt two prominent diffusion-based text generation models, Diffusion-LM and DiffuSeq, to perform machine translation.As the diffusion models generate non-autoregressively (NAR),we draw parallels to NAR machine translation models.With a comparison to conventional Transformer-based translation models, as well as to the Levenshtein Transformer,an established NAR MT model,we show that the multimodality problem that limits NAR machine translation performance is also a challenge to diffusion models.We demonstrate that knowledge distillation from an autoregressive model improves the performance of diffusion-based MT.A thorough analysis on the translation quality of inputs of different lengths shows that the diffusion models struggle more on long-range dependencies than other models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="demirag-etal-2024-benchmarking">
<titleInfo>
<title>Benchmarking Diffusion Models for Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yunus</namePart>
<namePart type="family">Demirag</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danni</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neele</namePart>
<namePart type="family">Falk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Papi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Diffusion models have recently shown great potential on many generative tasks.In this work, we explore diffusion models for machine translation (MT).We adapt two prominent diffusion-based text generation models, Diffusion-LM and DiffuSeq, to perform machine translation.As the diffusion models generate non-autoregressively (NAR),we draw parallels to NAR machine translation models.With a comparison to conventional Transformer-based translation models, as well as to the Levenshtein Transformer,an established NAR MT model,we show that the multimodality problem that limits NAR machine translation performance is also a challenge to diffusion models.We demonstrate that knowledge distillation from an autoregressive model improves the performance of diffusion-based MT.A thorough analysis on the translation quality of inputs of different lengths shows that the diffusion models struggle more on long-range dependencies than other models.</abstract>
<identifier type="citekey">demirag-etal-2024-benchmarking</identifier>
<location>
<url>https://aclanthology.org/2024.eacl-srw.25</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>313</start>
<end>324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking Diffusion Models for Machine Translation
%A Demirag, Yunus
%A Liu, Danni
%A Niehues, Jan
%Y Falk, Neele
%Y Papi, Sara
%Y Zhang, Mike
%S Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F demirag-etal-2024-benchmarking
%X Diffusion models have recently shown great potential on many generative tasks.In this work, we explore diffusion models for machine translation (MT).We adapt two prominent diffusion-based text generation models, Diffusion-LM and DiffuSeq, to perform machine translation.As the diffusion models generate non-autoregressively (NAR),we draw parallels to NAR machine translation models.With a comparison to conventional Transformer-based translation models, as well as to the Levenshtein Transformer,an established NAR MT model,we show that the multimodality problem that limits NAR machine translation performance is also a challenge to diffusion models.We demonstrate that knowledge distillation from an autoregressive model improves the performance of diffusion-based MT.A thorough analysis on the translation quality of inputs of different lengths shows that the diffusion models struggle more on long-range dependencies than other models.
%U https://aclanthology.org/2024.eacl-srw.25
%P 313-324
Markdown (Informal)
[Benchmarking Diffusion Models for Machine Translation](https://aclanthology.org/2024.eacl-srw.25) (Demirag et al., EACL 2024)
ACL
- Yunus Demirag, Danni Liu, and Jan Niehues. 2024. Benchmarking Diffusion Models for Machine Translation. In Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Student Research Workshop, pages 313–324, St. Julian’s, Malta. Association for Computational Linguistics.