@inproceedings{zheng-etal-2023-deep,
title = "Deep Equilibrium Non-Autoregressive Sequence Learning",
author = "Zheng, Zaixiang and
Zhou, Yi and
Zhou, Hao",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.747",
doi = "10.18653/v1/2023.findings-acl.747",
pages = "11763--11781",
abstract = "In this work, we argue that non-autoregressive (NAR) sequence generative models can equivalently be regarded as an iterative refinement process towards the target sequence, implying an underlying dynamical system of NAR model: z = f (z, x) → y. In such a way, the optimal prediction of a NAR model should be the equilibrium state of its dynamics if given infinitely many iterations. However, this is infeasible in practice due to limited computational and memory budgets. To this end, we propose DEQNAR to directly solve for the equilibrium state of NAR models based on deep equilibrium networks (Bai et al., 2019) with black-box root-finding solvers and back-propagate through the equilibrium point via implicit differentiation with constant memory. We conduct extensive experiments on four WMT machine translation benchmarks. Our main findings show that DEQNAR can indeed converge to a more accurate prediction and is a general-purpose framework that consistently helps yield substantial improvement for several strong NAR backbones.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zheng-etal-2023-deep">
<titleInfo>
<title>Deep Equilibrium Non-Autoregressive Sequence Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zaixiang</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we argue that non-autoregressive (NAR) sequence generative models can equivalently be regarded as an iterative refinement process towards the target sequence, implying an underlying dynamical system of NAR model: z = f (z, x) → y. In such a way, the optimal prediction of a NAR model should be the equilibrium state of its dynamics if given infinitely many iterations. However, this is infeasible in practice due to limited computational and memory budgets. To this end, we propose DEQNAR to directly solve for the equilibrium state of NAR models based on deep equilibrium networks (Bai et al., 2019) with black-box root-finding solvers and back-propagate through the equilibrium point via implicit differentiation with constant memory. We conduct extensive experiments on four WMT machine translation benchmarks. Our main findings show that DEQNAR can indeed converge to a more accurate prediction and is a general-purpose framework that consistently helps yield substantial improvement for several strong NAR backbones.</abstract>
<identifier type="citekey">zheng-etal-2023-deep</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.747</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.747</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>11763</start>
<end>11781</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Deep Equilibrium Non-Autoregressive Sequence Learning
%A Zheng, Zaixiang
%A Zhou, Yi
%A Zhou, Hao
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F zheng-etal-2023-deep
%X In this work, we argue that non-autoregressive (NAR) sequence generative models can equivalently be regarded as an iterative refinement process towards the target sequence, implying an underlying dynamical system of NAR model: z = f (z, x) → y. In such a way, the optimal prediction of a NAR model should be the equilibrium state of its dynamics if given infinitely many iterations. However, this is infeasible in practice due to limited computational and memory budgets. To this end, we propose DEQNAR to directly solve for the equilibrium state of NAR models based on deep equilibrium networks (Bai et al., 2019) with black-box root-finding solvers and back-propagate through the equilibrium point via implicit differentiation with constant memory. We conduct extensive experiments on four WMT machine translation benchmarks. Our main findings show that DEQNAR can indeed converge to a more accurate prediction and is a general-purpose framework that consistently helps yield substantial improvement for several strong NAR backbones.
%R 10.18653/v1/2023.findings-acl.747
%U https://aclanthology.org/2023.findings-acl.747
%U https://doi.org/10.18653/v1/2023.findings-acl.747
%P 11763-11781
Markdown (Informal)
[Deep Equilibrium Non-Autoregressive Sequence Learning](https://aclanthology.org/2023.findings-acl.747) (Zheng et al., Findings 2023)
ACL