@inproceedings{liu-etal-2026-tracing,
title = "Tracing the Light of Thought: A Probabilistic Self- and Cross-Consistency Verification Mechanism Improving Mathematical Reasoning in {LLM}s",
author = "Liu, Xiaoyang and
Wang, Dawei and
Li, Tian and
Liang, Huizhi and
Ushaw, Gary and
Davison, Richard",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1126/",
pages = "22444--22459",
ISBN = "979-8-89176-395-1",
abstract = "Reasoning capability is fundamental in enabling Large Language Models to perform complex multi-step inference. By sampling multiple reasoning paths and selecting the most frequent answer, Self Consistency (SC) remains highly effective but fails on challenging tasks where incorrect answers dominate the majority. Inspired by Metropolis Light Transport in physically-based rendering, where discovered high-contribution light paths guide subsequent sampling toward illumination sources, we propose Metropolis Self Consistency and its multi-LLM extension, Metropolis Cross Consistency, a probabilistic self- and cross-consistency verification framework for mathematical reasoning. Our approach employs an accept-reject mechanism to encourage high-quality reasoning paths, concentrating sampling in regions more likely to yield correct answers. Experiments on 9 LLMs across 4 challenging mathematical benchmarks demonstrate consistent improvements over SC. Even when combining models of vastly different capabilities, MCC maintains performance virtually matching the most capable model while significantly reducing computational cost compared to SC with the strongest model alone. While our implementation is training-free, adds minimal token overhead beyond SC, and requires no external reward model, our approach provides a flexible paradigm that can accommodate any scalar reward representing path correctness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2026-tracing">
<titleInfo>
<title>Tracing the Light of Thought: A Probabilistic Self- and Cross-Consistency Verification Mechanism Improving Mathematical Reasoning in LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaoyang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dawei</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huizhi</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gary</namePart>
<namePart type="family">Ushaw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Davison</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Reasoning capability is fundamental in enabling Large Language Models to perform complex multi-step inference. By sampling multiple reasoning paths and selecting the most frequent answer, Self Consistency (SC) remains highly effective but fails on challenging tasks where incorrect answers dominate the majority. Inspired by Metropolis Light Transport in physically-based rendering, where discovered high-contribution light paths guide subsequent sampling toward illumination sources, we propose Metropolis Self Consistency and its multi-LLM extension, Metropolis Cross Consistency, a probabilistic self- and cross-consistency verification framework for mathematical reasoning. Our approach employs an accept-reject mechanism to encourage high-quality reasoning paths, concentrating sampling in regions more likely to yield correct answers. Experiments on 9 LLMs across 4 challenging mathematical benchmarks demonstrate consistent improvements over SC. Even when combining models of vastly different capabilities, MCC maintains performance virtually matching the most capable model while significantly reducing computational cost compared to SC with the strongest model alone. While our implementation is training-free, adds minimal token overhead beyond SC, and requires no external reward model, our approach provides a flexible paradigm that can accommodate any scalar reward representing path correctness.</abstract>
<identifier type="citekey">liu-etal-2026-tracing</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1126/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>22444</start>
<end>22459</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tracing the Light of Thought: A Probabilistic Self- and Cross-Consistency Verification Mechanism Improving Mathematical Reasoning in LLMs
%A Liu, Xiaoyang
%A Wang, Dawei
%A Li, Tian
%A Liang, Huizhi
%A Ushaw, Gary
%A Davison, Richard
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F liu-etal-2026-tracing
%X Reasoning capability is fundamental in enabling Large Language Models to perform complex multi-step inference. By sampling multiple reasoning paths and selecting the most frequent answer, Self Consistency (SC) remains highly effective but fails on challenging tasks where incorrect answers dominate the majority. Inspired by Metropolis Light Transport in physically-based rendering, where discovered high-contribution light paths guide subsequent sampling toward illumination sources, we propose Metropolis Self Consistency and its multi-LLM extension, Metropolis Cross Consistency, a probabilistic self- and cross-consistency verification framework for mathematical reasoning. Our approach employs an accept-reject mechanism to encourage high-quality reasoning paths, concentrating sampling in regions more likely to yield correct answers. Experiments on 9 LLMs across 4 challenging mathematical benchmarks demonstrate consistent improvements over SC. Even when combining models of vastly different capabilities, MCC maintains performance virtually matching the most capable model while significantly reducing computational cost compared to SC with the strongest model alone. While our implementation is training-free, adds minimal token overhead beyond SC, and requires no external reward model, our approach provides a flexible paradigm that can accommodate any scalar reward representing path correctness.
%U https://aclanthology.org/2026.findings-acl.1126/
%P 22444-22459
Markdown (Informal)
[Tracing the Light of Thought: A Probabilistic Self- and Cross-Consistency Verification Mechanism Improving Mathematical Reasoning in LLMs](https://aclanthology.org/2026.findings-acl.1126/) (Liu et al., Findings 2026)
ACL