@inproceedings{bajpai-hanawal-2024-ceebert,
title = "{C}ee{BERT}: Cross-Domain Inference in Early Exit {BERT}",
author = "Bajpai, Divya Jyoti and
Hanawal, Manjesh",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.101",
doi = "10.18653/v1/2024.findings-acl.101",
pages = "1736--1748",
abstract = "Pre-trained Language Models (PLMs), like BERT, with self-supervision objectives exhibit remarkable performance and generalization across various tasks. However, they suffer in inference latency due to their large size. To address this issue, side branches are attached at intermediate layers, enabling early inference of samples without requiring them to pass through all layers. However, the challenge is to decide which layer to infer and exit each sample so that the accuracy and latency are balanced. Moreover, the distribution of the samples to be inferred may differ from that used for training necessitating cross-domain adaptation. We propose an online learning algorithm named Cross-Domain Inference in Early Exit BERT (CeeBERT) that dynamically determines early exits of samples based on the level of confidence at each exit point. CeeBERT learns optimal thresholds from domain-specific confidence observed at intermediate layers on the fly, eliminating the need for labeled data. Experimental results on five distinct datasets with BERT and ALBERT models demonstrate CeeBERT{'}s ability to improve latency by reducing unnecessary computations with minimal drop in performance. By adapting to the threshold values, CeeBERT can speed up the BERT/ALBERT models by $2\times$ - $3.1\times$ with minimal drop in accuracy. The anonymized source code is available at https://github.com/Div290/CeeBERT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bajpai-hanawal-2024-ceebert">
<titleInfo>
<title>CeeBERT: Cross-Domain Inference in Early Exit BERT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Divya</namePart>
<namePart type="given">Jyoti</namePart>
<namePart type="family">Bajpai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manjesh</namePart>
<namePart type="family">Hanawal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Pre-trained Language Models (PLMs), like BERT, with self-supervision objectives exhibit remarkable performance and generalization across various tasks. However, they suffer in inference latency due to their large size. To address this issue, side branches are attached at intermediate layers, enabling early inference of samples without requiring them to pass through all layers. However, the challenge is to decide which layer to infer and exit each sample so that the accuracy and latency are balanced. Moreover, the distribution of the samples to be inferred may differ from that used for training necessitating cross-domain adaptation. We propose an online learning algorithm named Cross-Domain Inference in Early Exit BERT (CeeBERT) that dynamically determines early exits of samples based on the level of confidence at each exit point. CeeBERT learns optimal thresholds from domain-specific confidence observed at intermediate layers on the fly, eliminating the need for labeled data. Experimental results on five distinct datasets with BERT and ALBERT models demonstrate CeeBERT’s ability to improve latency by reducing unnecessary computations with minimal drop in performance. By adapting to the threshold values, CeeBERT can speed up the BERT/ALBERT models by 2\times - 3.1\times with minimal drop in accuracy. The anonymized source code is available at https://github.com/Div290/CeeBERT.</abstract>
<identifier type="citekey">bajpai-hanawal-2024-ceebert</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.101</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.101</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>1736</start>
<end>1748</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CeeBERT: Cross-Domain Inference in Early Exit BERT
%A Bajpai, Divya Jyoti
%A Hanawal, Manjesh
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F bajpai-hanawal-2024-ceebert
%X Pre-trained Language Models (PLMs), like BERT, with self-supervision objectives exhibit remarkable performance and generalization across various tasks. However, they suffer in inference latency due to their large size. To address this issue, side branches are attached at intermediate layers, enabling early inference of samples without requiring them to pass through all layers. However, the challenge is to decide which layer to infer and exit each sample so that the accuracy and latency are balanced. Moreover, the distribution of the samples to be inferred may differ from that used for training necessitating cross-domain adaptation. We propose an online learning algorithm named Cross-Domain Inference in Early Exit BERT (CeeBERT) that dynamically determines early exits of samples based on the level of confidence at each exit point. CeeBERT learns optimal thresholds from domain-specific confidence observed at intermediate layers on the fly, eliminating the need for labeled data. Experimental results on five distinct datasets with BERT and ALBERT models demonstrate CeeBERT’s ability to improve latency by reducing unnecessary computations with minimal drop in performance. By adapting to the threshold values, CeeBERT can speed up the BERT/ALBERT models by 2\times - 3.1\times with minimal drop in accuracy. The anonymized source code is available at https://github.com/Div290/CeeBERT.
%R 10.18653/v1/2024.findings-acl.101
%U https://aclanthology.org/2024.findings-acl.101
%U https://doi.org/10.18653/v1/2024.findings-acl.101
%P 1736-1748
Markdown (Informal)
[CeeBERT: Cross-Domain Inference in Early Exit BERT](https://aclanthology.org/2024.findings-acl.101) (Bajpai & Hanawal, Findings 2024)
ACL