@inproceedings{ji-etal-2023-early,
title = "Early Exit with Disentangled Representation and Equiangular Tight Frame",
author = "Ji, Yixin and
Wang, Jikai and
Li, Juntao and
Chen, Qiang and
Chen, Wenliang and
Zhang, Min",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.889",
doi = "10.18653/v1/2023.findings-acl.889",
pages = "14128--14142",
abstract = "Dynamic early exit has demonstrated great potential in coping with the sharply increasing number of pre-trained language model parameters, which can achieve a good trade-off between performance and efficiency. The existing early exit paradigm relies on training parametrical internal classifiers at each intermediate layer to complete specific tasks. Based on the predictions of these internal classifiers, different methods are designed to decide when to exit. Under this circumstance, each intermediate layer takes on both generic language representation learning and task-specific feature extraction, which makes each intermediate layer struggle to balance two types of backward loss signals during training. To break this dilemma, we propose an adapter method to decouple the two distinct types of representation and further introduce a non-parametric simplex equiangular tight frame classifier (ETF) for improvement. Extensive experiments on monolingual and multilingual tasks demonstrate that our method gains significant improvements over strong PLM backbones and early exit methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ji-etal-2023-early">
<titleInfo>
<title>Early Exit with Disentangled Representation and Equiangular Tight Frame</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jikai</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenliang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dynamic early exit has demonstrated great potential in coping with the sharply increasing number of pre-trained language model parameters, which can achieve a good trade-off between performance and efficiency. The existing early exit paradigm relies on training parametrical internal classifiers at each intermediate layer to complete specific tasks. Based on the predictions of these internal classifiers, different methods are designed to decide when to exit. Under this circumstance, each intermediate layer takes on both generic language representation learning and task-specific feature extraction, which makes each intermediate layer struggle to balance two types of backward loss signals during training. To break this dilemma, we propose an adapter method to decouple the two distinct types of representation and further introduce a non-parametric simplex equiangular tight frame classifier (ETF) for improvement. Extensive experiments on monolingual and multilingual tasks demonstrate that our method gains significant improvements over strong PLM backbones and early exit methods.</abstract>
<identifier type="citekey">ji-etal-2023-early</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.889</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.889</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>14128</start>
<end>14142</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Early Exit with Disentangled Representation and Equiangular Tight Frame
%A Ji, Yixin
%A Wang, Jikai
%A Li, Juntao
%A Chen, Qiang
%A Chen, Wenliang
%A Zhang, Min
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F ji-etal-2023-early
%X Dynamic early exit has demonstrated great potential in coping with the sharply increasing number of pre-trained language model parameters, which can achieve a good trade-off between performance and efficiency. The existing early exit paradigm relies on training parametrical internal classifiers at each intermediate layer to complete specific tasks. Based on the predictions of these internal classifiers, different methods are designed to decide when to exit. Under this circumstance, each intermediate layer takes on both generic language representation learning and task-specific feature extraction, which makes each intermediate layer struggle to balance two types of backward loss signals during training. To break this dilemma, we propose an adapter method to decouple the two distinct types of representation and further introduce a non-parametric simplex equiangular tight frame classifier (ETF) for improvement. Extensive experiments on monolingual and multilingual tasks demonstrate that our method gains significant improvements over strong PLM backbones and early exit methods.
%R 10.18653/v1/2023.findings-acl.889
%U https://aclanthology.org/2023.findings-acl.889
%U https://doi.org/10.18653/v1/2023.findings-acl.889
%P 14128-14142
Markdown (Informal)
[Early Exit with Disentangled Representation and Equiangular Tight Frame](https://aclanthology.org/2023.findings-acl.889) (Ji et al., Findings 2023)
ACL