@inproceedings{zhou-etal-2025-alw,
title = "{ALW}: Adaptive Layer-Wise contrastive decoding enhancing reasoning ability in Large Language Models",
author = "Zhou, Yuechi and
Zhou, Chuyue and
Zhang, Jianxin and
Li, Juntao and
Zhang, Min",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.447/",
doi = "10.18653/v1/2025.findings-acl.447",
pages = "8506--8524",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) have achieved remarkable performance across various reasoning tasks. However, many LLMs still encounter challenges in reasoning, especially for LLMs with fewer parameters or insufficient pre-training data. Through our experiments, we identify that noise accumulation across layers often leads to unstable token predictions during reasoning. We find that contrasting the probability distributions across layers effectively mitigates this interference. Building on this insight, we propose Adaptive Layer-Wise contrastive decoding (ALW), a novel framework that enhances reasoning ability by dynamically disentangling noise in shallow layers from critical signals in deep layers. Extensive experiments on several reasoning benchmarks demonstrate that ALW consistently improves answer accuracy across multiple LLMs while maintaining inference efficiency. For example, we achieve a 48{\%} improvement on the Gsm8k using the LLaMA-7B model and an absolute accuracy increase of 5.2 points on the BBH evaluation benchmark with the LLaMA-65B model."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2025-alw">
<titleInfo>
<title>ALW: Adaptive Layer-Wise contrastive decoding enhancing reasoning ability in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuechi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyue</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jianxin</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juntao</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have achieved remarkable performance across various reasoning tasks. However, many LLMs still encounter challenges in reasoning, especially for LLMs with fewer parameters or insufficient pre-training data. Through our experiments, we identify that noise accumulation across layers often leads to unstable token predictions during reasoning. We find that contrasting the probability distributions across layers effectively mitigates this interference. Building on this insight, we propose Adaptive Layer-Wise contrastive decoding (ALW), a novel framework that enhances reasoning ability by dynamically disentangling noise in shallow layers from critical signals in deep layers. Extensive experiments on several reasoning benchmarks demonstrate that ALW consistently improves answer accuracy across multiple LLMs while maintaining inference efficiency. For example, we achieve a 48% improvement on the Gsm8k using the LLaMA-7B model and an absolute accuracy increase of 5.2 points on the BBH evaluation benchmark with the LLaMA-65B model.</abstract>
<identifier type="citekey">zhou-etal-2025-alw</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.447</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.447/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>8506</start>
<end>8524</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ALW: Adaptive Layer-Wise contrastive decoding enhancing reasoning ability in Large Language Models
%A Zhou, Yuechi
%A Zhou, Chuyue
%A Zhang, Jianxin
%A Li, Juntao
%A Zhang, Min
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F zhou-etal-2025-alw
%X Large language models (LLMs) have achieved remarkable performance across various reasoning tasks. However, many LLMs still encounter challenges in reasoning, especially for LLMs with fewer parameters or insufficient pre-training data. Through our experiments, we identify that noise accumulation across layers often leads to unstable token predictions during reasoning. We find that contrasting the probability distributions across layers effectively mitigates this interference. Building on this insight, we propose Adaptive Layer-Wise contrastive decoding (ALW), a novel framework that enhances reasoning ability by dynamically disentangling noise in shallow layers from critical signals in deep layers. Extensive experiments on several reasoning benchmarks demonstrate that ALW consistently improves answer accuracy across multiple LLMs while maintaining inference efficiency. For example, we achieve a 48% improvement on the Gsm8k using the LLaMA-7B model and an absolute accuracy increase of 5.2 points on the BBH evaluation benchmark with the LLaMA-65B model.
%R 10.18653/v1/2025.findings-acl.447
%U https://aclanthology.org/2025.findings-acl.447/
%U https://doi.org/10.18653/v1/2025.findings-acl.447
%P 8506-8524
Markdown (Informal)
[ALW: Adaptive Layer-Wise contrastive decoding enhancing reasoning ability in Large Language Models](https://aclanthology.org/2025.findings-acl.447/) (Zhou et al., Findings 2025)
ACL