@inproceedings{li-etal-2025-incremental,
title = "Incremental Transformer: Efficient Encoder for Incremented Text Over {MRC} and Conversation Tasks",
author = "Li, Weisheng and
Wang, Yuechen and
Shi, Jiaxin and
Zhou, Wengang and
Tian, Qi and
Li, Houqiang",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.590/",
pages = "8819--8829",
abstract = "Some encoder inputs such as conversation histories are frequently extended with short additional inputs like new responses. However, to obtain the real-time encoding of the extended input, existing Transformer-based encoders like BERT have to encode the whole extended input again without utilizing the existing encoding of the original input, which may be prohibitively slow for real-time applications. In this paper, we introduce Incremental Transformer, an efficient encoder dedicated for faster encoding of incremented input. It takes only added input as input but attends to cached representations of original input in lower layers for better performance. By treating questions as additional inputs of a passage, Incremental Transformer can also be applied to accelerate MRC tasks. Experimental results show tiny decline in effectiveness but significant speedup against traditional full encoder across various MRC and multi-turn conversational question answering tasks. With the help from simple distillation-like auxiliary losses, Incremental Transformer achieves a speedup of 6.2x, with a mere 2.2 point accuracy reduction in comparison to RoBERTa-Large on SQuADV1.1."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-incremental">
<titleInfo>
<title>Incremental Transformer: Efficient Encoder for Incremented Text Over MRC and Conversation Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Weisheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuechen</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaxin</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wengang</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qi</namePart>
<namePart type="family">Tian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houqiang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Some encoder inputs such as conversation histories are frequently extended with short additional inputs like new responses. However, to obtain the real-time encoding of the extended input, existing Transformer-based encoders like BERT have to encode the whole extended input again without utilizing the existing encoding of the original input, which may be prohibitively slow for real-time applications. In this paper, we introduce Incremental Transformer, an efficient encoder dedicated for faster encoding of incremented input. It takes only added input as input but attends to cached representations of original input in lower layers for better performance. By treating questions as additional inputs of a passage, Incremental Transformer can also be applied to accelerate MRC tasks. Experimental results show tiny decline in effectiveness but significant speedup against traditional full encoder across various MRC and multi-turn conversational question answering tasks. With the help from simple distillation-like auxiliary losses, Incremental Transformer achieves a speedup of 6.2x, with a mere 2.2 point accuracy reduction in comparison to RoBERTa-Large on SQuADV1.1.</abstract>
<identifier type="citekey">li-etal-2025-incremental</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.590/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>8819</start>
<end>8829</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Incremental Transformer: Efficient Encoder for Incremented Text Over MRC and Conversation Tasks
%A Li, Weisheng
%A Wang, Yuechen
%A Shi, Jiaxin
%A Zhou, Wengang
%A Tian, Qi
%A Li, Houqiang
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F li-etal-2025-incremental
%X Some encoder inputs such as conversation histories are frequently extended with short additional inputs like new responses. However, to obtain the real-time encoding of the extended input, existing Transformer-based encoders like BERT have to encode the whole extended input again without utilizing the existing encoding of the original input, which may be prohibitively slow for real-time applications. In this paper, we introduce Incremental Transformer, an efficient encoder dedicated for faster encoding of incremented input. It takes only added input as input but attends to cached representations of original input in lower layers for better performance. By treating questions as additional inputs of a passage, Incremental Transformer can also be applied to accelerate MRC tasks. Experimental results show tiny decline in effectiveness but significant speedup against traditional full encoder across various MRC and multi-turn conversational question answering tasks. With the help from simple distillation-like auxiliary losses, Incremental Transformer achieves a speedup of 6.2x, with a mere 2.2 point accuracy reduction in comparison to RoBERTa-Large on SQuADV1.1.
%U https://aclanthology.org/2025.coling-main.590/
%P 8819-8829
Markdown (Informal)
[Incremental Transformer: Efficient Encoder for Incremented Text Over MRC and Conversation Tasks](https://aclanthology.org/2025.coling-main.590/) (Li et al., COLING 2025)
ACL