@inproceedings{yu-etal-2025-mitigate,
title = "Mitigate Position Bias in {LLM}s via Scaling a Single Hidden States Channel",
author = "Yu, Yijiong and
Jiang, Huiqiang and
Luo, Xufang and
Wu, Qianhui and
Lin, Chin-Yew and
Li, Dongsheng and
Yang, Yuqing and
Huang, Yongfeng and
Qiu, Lili",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.316/",
doi = "10.18653/v1/2025.findings-acl.316",
pages = "6092--6111",
ISBN = "979-8-89176-256-5",
abstract = "Long-context language models (LCLMs) can process long context, but still exhibit position bias, also known as ``lost in the middle'', which indicates placing key information in the middle of the context will significantly affect performance. To mitigating this, we first explore the micro-level manifestations of position bias, concluding that attention weights are a micro-level expression of position bias. Then we identify that, in addition to position embeddings, positional information in hidden states also contributes to position bias, and it manifests itself in specific channels of hidden states, called positional hidden states. Based on these, we propose a method to mitigate position bias by scaling positional hidden states. Experiments on NaturalQuestions Multi-document QA, KV retrieval and LongBench, using various models including RoPE models, context window-extended models, and Alibi models, demonstrate the effectiveness and generalizability of our approach. Our method can improve performance by up to 15.2{\%} in ``lost in the middle'' benchmark by modifying just one channel of hidden states. Our code is available at https://aka.ms/PositionalHidden."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2025-mitigate">
<titleInfo>
<title>Mitigate Position Bias in LLMs via Scaling a Single Hidden States Channel</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yijiong</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huiqiang</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xufang</namePart>
<namePart type="family">Luo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qianhui</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chin-Yew</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongsheng</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuqing</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yongfeng</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lili</namePart>
<namePart type="family">Qiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Long-context language models (LCLMs) can process long context, but still exhibit position bias, also known as “lost in the middle”, which indicates placing key information in the middle of the context will significantly affect performance. To mitigating this, we first explore the micro-level manifestations of position bias, concluding that attention weights are a micro-level expression of position bias. Then we identify that, in addition to position embeddings, positional information in hidden states also contributes to position bias, and it manifests itself in specific channels of hidden states, called positional hidden states. Based on these, we propose a method to mitigate position bias by scaling positional hidden states. Experiments on NaturalQuestions Multi-document QA, KV retrieval and LongBench, using various models including RoPE models, context window-extended models, and Alibi models, demonstrate the effectiveness and generalizability of our approach. Our method can improve performance by up to 15.2% in “lost in the middle” benchmark by modifying just one channel of hidden states. Our code is available at https://aka.ms/PositionalHidden.</abstract>
<identifier type="citekey">yu-etal-2025-mitigate</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.316</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.316/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>6092</start>
<end>6111</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mitigate Position Bias in LLMs via Scaling a Single Hidden States Channel
%A Yu, Yijiong
%A Jiang, Huiqiang
%A Luo, Xufang
%A Wu, Qianhui
%A Lin, Chin-Yew
%A Li, Dongsheng
%A Yang, Yuqing
%A Huang, Yongfeng
%A Qiu, Lili
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F yu-etal-2025-mitigate
%X Long-context language models (LCLMs) can process long context, but still exhibit position bias, also known as “lost in the middle”, which indicates placing key information in the middle of the context will significantly affect performance. To mitigating this, we first explore the micro-level manifestations of position bias, concluding that attention weights are a micro-level expression of position bias. Then we identify that, in addition to position embeddings, positional information in hidden states also contributes to position bias, and it manifests itself in specific channels of hidden states, called positional hidden states. Based on these, we propose a method to mitigate position bias by scaling positional hidden states. Experiments on NaturalQuestions Multi-document QA, KV retrieval and LongBench, using various models including RoPE models, context window-extended models, and Alibi models, demonstrate the effectiveness and generalizability of our approach. Our method can improve performance by up to 15.2% in “lost in the middle” benchmark by modifying just one channel of hidden states. Our code is available at https://aka.ms/PositionalHidden.
%R 10.18653/v1/2025.findings-acl.316
%U https://aclanthology.org/2025.findings-acl.316/
%U https://doi.org/10.18653/v1/2025.findings-acl.316
%P 6092-6111
Markdown (Informal)
[Mitigate Position Bias in LLMs via Scaling a Single Hidden States Channel](https://aclanthology.org/2025.findings-acl.316/) (Yu et al., Findings 2025)
ACL
- Yijiong Yu, Huiqiang Jiang, Xufang Luo, Qianhui Wu, Chin-Yew Lin, Dongsheng Li, Yuqing Yang, Yongfeng Huang, and Lili Qiu. 2025. Mitigate Position Bias in LLMs via Scaling a Single Hidden States Channel. In Findings of the Association for Computational Linguistics: ACL 2025, pages 6092–6111, Vienna, Austria. Association for Computational Linguistics.