@inproceedings{jianbang-etal-2023-adder,
title = "Adder Encoder for Pre-trained Language Model",
author = "Jianbang, Ding and
Suiyun, Zhang and
Linlin, Li",
editor = "Sun, Maosong and
Qin, Bing and
Qiu, Xipeng and
Jiang, Jing and
Han, Xianpei",
booktitle = "Proceedings of the 22nd Chinese National Conference on Computational Linguistics",
month = aug,
year = "2023",
address = "Harbin, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2023.ccl-1.76/",
pages = "898--905",
language = "eng",
abstract = "{\textquotedblleft}BERT, a pre-trained language model entirely based on attention, has proven to be highly per-formant for many natural language understanding tasks. However, pre-trained language mod-els (PLMs) are often computationally expensive and can hardly be implemented with limitedresources. To reduce energy burden, we introduce adder operations into the Transformer en-coder and propose a novel AdderBERT with powerful representation capability. Moreover, weadopt mapping-based distillation to further improve its energy efficiency with an assured perfor-mance. Empirical results demonstrate that AddderBERT6 achieves highly competitive perfor-mance against that of its teacher BERTBASE on the GLUE benchmark while obtaining a 4.9xreduction in energy consumption.{\textquotedblright}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jianbang-etal-2023-adder">
<titleInfo>
<title>Adder Encoder for Pre-trained Language Model</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ding</namePart>
<namePart type="family">Jianbang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Suiyun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Li</namePart>
<namePart type="family">Linlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Chinese National Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bing</namePart>
<namePart type="family">Qin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xipeng</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Harbin, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“BERT, a pre-trained language model entirely based on attention, has proven to be highly per-formant for many natural language understanding tasks. However, pre-trained language mod-els (PLMs) are often computationally expensive and can hardly be implemented with limitedresources. To reduce energy burden, we introduce adder operations into the Transformer en-coder and propose a novel AdderBERT with powerful representation capability. Moreover, weadopt mapping-based distillation to further improve its energy efficiency with an assured perfor-mance. Empirical results demonstrate that AddderBERT6 achieves highly competitive perfor-mance against that of its teacher BERTBASE on the GLUE benchmark while obtaining a 4.9xreduction in energy consumption.”</abstract>
<identifier type="citekey">jianbang-etal-2023-adder</identifier>
<location>
<url>https://aclanthology.org/2023.ccl-1.76/</url>
</location>
<part>
<date>2023-08</date>
<extent unit="page">
<start>898</start>
<end>905</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adder Encoder for Pre-trained Language Model
%A Jianbang, Ding
%A Suiyun, Zhang
%A Linlin, Li
%Y Sun, Maosong
%Y Qin, Bing
%Y Qiu, Xipeng
%Y Jiang, Jing
%Y Han, Xianpei
%S Proceedings of the 22nd Chinese National Conference on Computational Linguistics
%D 2023
%8 August
%I Chinese Information Processing Society of China
%C Harbin, China
%G eng
%F jianbang-etal-2023-adder
%X “BERT, a pre-trained language model entirely based on attention, has proven to be highly per-formant for many natural language understanding tasks. However, pre-trained language mod-els (PLMs) are often computationally expensive and can hardly be implemented with limitedresources. To reduce energy burden, we introduce adder operations into the Transformer en-coder and propose a novel AdderBERT with powerful representation capability. Moreover, weadopt mapping-based distillation to further improve its energy efficiency with an assured perfor-mance. Empirical results demonstrate that AddderBERT6 achieves highly competitive perfor-mance against that of its teacher BERTBASE on the GLUE benchmark while obtaining a 4.9xreduction in energy consumption.”
%U https://aclanthology.org/2023.ccl-1.76/
%P 898-905
Markdown (Informal)
[Adder Encoder for Pre-trained Language Model](https://aclanthology.org/2023.ccl-1.76/) (Jianbang et al., CCL 2023)
ACL
- Ding Jianbang, Zhang Suiyun, and Li Linlin. 2023. Adder Encoder for Pre-trained Language Model. In Proceedings of the 22nd Chinese National Conference on Computational Linguistics, pages 898–905, Harbin, China. Chinese Information Processing Society of China.