@inproceedings{qian-etal-2025-boosting,
title = "Boosting Long-Context Information Seeking via Query-Guided Activation Refilling",
author = "Qian, Hongjin and
Liu, Zheng and
Zhang, Peitian and
Dou, Zhicheng and
Lian, Defu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.465/",
doi = "10.18653/v1/2025.acl-long.465",
pages = "9453--9464",
ISBN = "979-8-89176-251-0",
abstract = "Processing long contexts poses a significant challenge for large language models (LLMs) due to their inherent context window limitations and the computational burden of extensive key-value (KV) activations, which severely impact efficiency. For information-seeking tasks, full context perception is often unnecessary, as a query{'}s information needs can dynamically range from localized details to a global perspective, depending on its complexity. However, existing methods struggle to adapt effectively to this dynamic information needs.In the paper, we propose a method for processing long-context information-seeking tasks via query-guided ACtivation REfilling (ACRE). ACRE constructs a Bi-layer KV Cache for long contexts, where the layer-1 (L1) cache compactly captures global information, and the layer-2 (L2) cache provides detailed, localized information. ACRE establishes a proxying relationship between the two caches, allowing the input query to attend to the L1 cache and dynamically refill it with relevant entries from the L2 cache. This mechanism integrates global understanding with query-specific local details, thereby enhancing answer decoding. Experiments on a variety of long-context information-seeking datasets demonstrate ACRE{'}s effectiveness, achieving significant improvements in both performance and efficiency."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="qian-etal-2025-boosting">
<titleInfo>
<title>Boosting Long-Context Information Seeking via Query-Guided Activation Refilling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hongjin</namePart>
<namePart type="family">Qian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peitian</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhicheng</namePart>
<namePart type="family">Dou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Defu</namePart>
<namePart type="family">Lian</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Processing long contexts poses a significant challenge for large language models (LLMs) due to their inherent context window limitations and the computational burden of extensive key-value (KV) activations, which severely impact efficiency. For information-seeking tasks, full context perception is often unnecessary, as a query’s information needs can dynamically range from localized details to a global perspective, depending on its complexity. However, existing methods struggle to adapt effectively to this dynamic information needs.In the paper, we propose a method for processing long-context information-seeking tasks via query-guided ACtivation REfilling (ACRE). ACRE constructs a Bi-layer KV Cache for long contexts, where the layer-1 (L1) cache compactly captures global information, and the layer-2 (L2) cache provides detailed, localized information. ACRE establishes a proxying relationship between the two caches, allowing the input query to attend to the L1 cache and dynamically refill it with relevant entries from the L2 cache. This mechanism integrates global understanding with query-specific local details, thereby enhancing answer decoding. Experiments on a variety of long-context information-seeking datasets demonstrate ACRE’s effectiveness, achieving significant improvements in both performance and efficiency.</abstract>
<identifier type="citekey">qian-etal-2025-boosting</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.465</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.465/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>9453</start>
<end>9464</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Boosting Long-Context Information Seeking via Query-Guided Activation Refilling
%A Qian, Hongjin
%A Liu, Zheng
%A Zhang, Peitian
%A Dou, Zhicheng
%A Lian, Defu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F qian-etal-2025-boosting
%X Processing long contexts poses a significant challenge for large language models (LLMs) due to their inherent context window limitations and the computational burden of extensive key-value (KV) activations, which severely impact efficiency. For information-seeking tasks, full context perception is often unnecessary, as a query’s information needs can dynamically range from localized details to a global perspective, depending on its complexity. However, existing methods struggle to adapt effectively to this dynamic information needs.In the paper, we propose a method for processing long-context information-seeking tasks via query-guided ACtivation REfilling (ACRE). ACRE constructs a Bi-layer KV Cache for long contexts, where the layer-1 (L1) cache compactly captures global information, and the layer-2 (L2) cache provides detailed, localized information. ACRE establishes a proxying relationship between the two caches, allowing the input query to attend to the L1 cache and dynamically refill it with relevant entries from the L2 cache. This mechanism integrates global understanding with query-specific local details, thereby enhancing answer decoding. Experiments on a variety of long-context information-seeking datasets demonstrate ACRE’s effectiveness, achieving significant improvements in both performance and efficiency.
%R 10.18653/v1/2025.acl-long.465
%U https://aclanthology.org/2025.acl-long.465/
%U https://doi.org/10.18653/v1/2025.acl-long.465
%P 9453-9464
Markdown (Informal)
[Boosting Long-Context Information Seeking via Query-Guided Activation Refilling](https://aclanthology.org/2025.acl-long.465/) (Qian et al., ACL 2025)
ACL