@inproceedings{wang-etal-2026-student,
title = "Student Guides Teacher: Weak-to-Strong Inference via Spectral Orthogonal Exploration",
author = "Wang, Dayu and
Yang, Jiaye and
Li, Weikang and
Liang, Jiahui and
Li, Yang and
Xia, Deguo and
Huang, Jizhou",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-long.761/",
pages = "16716--16733",
ISBN = "979-8-89176-390-6",
abstract = "Large Language Models (LLMs) often suffer from ``Reasoning Collapse'' on challenging mathematical reasoning tasks, where stochastic sampling produces lexical variations of the same erroneous logic rather than genuine semantic exploration. We observe that failed reasoning traces are often associated with a low-rank bias manifold in the model{'}s hidden-state geometry, which reduces exploration toward corrective solution directions. To address this, we propose Spectral Orthogonal Exploration (SOE), a geometric inference framework under a ``Student Guides Teacher'' paradigm. Instead of using a weak auxiliary agent for imitation, SOE uses it as an orthogonal probe to introduce semantically heterogeneous reasoning signals into the teacher{'}s orthogonal complement of its dominant subspace. This intervention steers the teacher toward more diverse reasoning trajectories and improves exploration beyond standard sampling. Experiments on mathematical benchmarks show that SOE improves average accuracy by 62.4{\%} and average sampling efficiency by 113.7{\%} over baseline methods, suggesting that geometric interventions can be effective for mitigating reasoning collapse in mathematical reasoning. We further provide preliminary evidence that SOE is also effective on logic and code generation benchmarks. Code is available at https://github.com/dayuwang401/spectral-orthogonal-exploration."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2026-student">
<titleInfo>
<title>Student Guides Teacher: Weak-to-Strong Inference via Spectral Orthogonal Exploration</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dayu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaye</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weikang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiahui</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deguo</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jizhou</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-390-6</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) often suffer from “Reasoning Collapse” on challenging mathematical reasoning tasks, where stochastic sampling produces lexical variations of the same erroneous logic rather than genuine semantic exploration. We observe that failed reasoning traces are often associated with a low-rank bias manifold in the model’s hidden-state geometry, which reduces exploration toward corrective solution directions. To address this, we propose Spectral Orthogonal Exploration (SOE), a geometric inference framework under a “Student Guides Teacher” paradigm. Instead of using a weak auxiliary agent for imitation, SOE uses it as an orthogonal probe to introduce semantically heterogeneous reasoning signals into the teacher’s orthogonal complement of its dominant subspace. This intervention steers the teacher toward more diverse reasoning trajectories and improves exploration beyond standard sampling. Experiments on mathematical benchmarks show that SOE improves average accuracy by 62.4% and average sampling efficiency by 113.7% over baseline methods, suggesting that geometric interventions can be effective for mitigating reasoning collapse in mathematical reasoning. We further provide preliminary evidence that SOE is also effective on logic and code generation benchmarks. Code is available at https://github.com/dayuwang401/spectral-orthogonal-exploration.</abstract>
<identifier type="citekey">wang-etal-2026-student</identifier>
<location>
<url>https://aclanthology.org/2026.acl-long.761/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>16716</start>
<end>16733</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Student Guides Teacher: Weak-to-Strong Inference via Spectral Orthogonal Exploration
%A Wang, Dayu
%A Yang, Jiaye
%A Li, Weikang
%A Liang, Jiahui
%A Li, Yang
%A Xia, Deguo
%A Huang, Jizhou
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-390-6
%F wang-etal-2026-student
%X Large Language Models (LLMs) often suffer from “Reasoning Collapse” on challenging mathematical reasoning tasks, where stochastic sampling produces lexical variations of the same erroneous logic rather than genuine semantic exploration. We observe that failed reasoning traces are often associated with a low-rank bias manifold in the model’s hidden-state geometry, which reduces exploration toward corrective solution directions. To address this, we propose Spectral Orthogonal Exploration (SOE), a geometric inference framework under a “Student Guides Teacher” paradigm. Instead of using a weak auxiliary agent for imitation, SOE uses it as an orthogonal probe to introduce semantically heterogeneous reasoning signals into the teacher’s orthogonal complement of its dominant subspace. This intervention steers the teacher toward more diverse reasoning trajectories and improves exploration beyond standard sampling. Experiments on mathematical benchmarks show that SOE improves average accuracy by 62.4% and average sampling efficiency by 113.7% over baseline methods, suggesting that geometric interventions can be effective for mitigating reasoning collapse in mathematical reasoning. We further provide preliminary evidence that SOE is also effective on logic and code generation benchmarks. Code is available at https://github.com/dayuwang401/spectral-orthogonal-exploration.
%U https://aclanthology.org/2026.acl-long.761/
%P 16716-16733
Markdown (Informal)
[Student Guides Teacher: Weak-to-Strong Inference via Spectral Orthogonal Exploration](https://aclanthology.org/2026.acl-long.761/) (Wang et al., ACL 2026)
ACL
- Dayu Wang, Jiaye Yang, Weikang Li, Jiahui Liang, Yang Li, Deguo Xia, and Jizhou Huang. 2026. Student Guides Teacher: Weak-to-Strong Inference via Spectral Orthogonal Exploration. In Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 16716–16733, San Diego, California, United States. Association for Computational Linguistics.