@inproceedings{chen-etal-2026-decoupling,
title = "Decoupling the Effect of Chain-of-Thought Reasoning: A Human Label Variation Perspective",
author = "Chen, Beiduo and
Hu, Tiancheng and
Zhang, Caiqi and
Litschko, Robert and
Korhonen, Anna and
Plank, Barbara",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.1103/",
pages = "21931--21951",
ISBN = "979-8-89176-395-1",
abstract = "Reasoning-tuned LLMs utilizing long Chain-of-Thought (CoT) excel at single-answer tasks, yet their ability to model Human Label Variation{---}which requires capturing probabilistic ambiguity rather than resolving it{---}remains underexplored. We investigate this through systematic disentanglement experiments on distribution-based tasks, employing Cross-CoT experiments to isolate the effect of reasoning text from intrinsic model priors. We observe a distinct ``decoupled mechanism'': while CoT improves distributional alignment, final accuracy is dictated by CoT content (99{\%} variance contribution), whereas distributional ranking is governed by model priors (over 80{\%}). Step-wise analysis further shows that while CoT{'}s influence on accuracy grows monotonically during the reasoning process, distributional structure is largely determined by LLM{'}s intrinsic priors. These findings suggest that long CoT serves as a decisive LLM decision-maker for the top option but fails to function as a granular distribution calibrator for ambiguous tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2026-decoupling">
<titleInfo>
<title>Decoupling the Effect of Chain-of-Thought Reasoning: A Human Label Variation Perspective</title>
</titleInfo>
<name type="personal">
<namePart type="given">Beiduo</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiancheng</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Caiqi</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Litschko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Korhonen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Reasoning-tuned LLMs utilizing long Chain-of-Thought (CoT) excel at single-answer tasks, yet their ability to model Human Label Variation—which requires capturing probabilistic ambiguity rather than resolving it—remains underexplored. We investigate this through systematic disentanglement experiments on distribution-based tasks, employing Cross-CoT experiments to isolate the effect of reasoning text from intrinsic model priors. We observe a distinct “decoupled mechanism”: while CoT improves distributional alignment, final accuracy is dictated by CoT content (99% variance contribution), whereas distributional ranking is governed by model priors (over 80%). Step-wise analysis further shows that while CoT’s influence on accuracy grows monotonically during the reasoning process, distributional structure is largely determined by LLM’s intrinsic priors. These findings suggest that long CoT serves as a decisive LLM decision-maker for the top option but fails to function as a granular distribution calibrator for ambiguous tasks.</abstract>
<identifier type="citekey">chen-etal-2026-decoupling</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.1103/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>21931</start>
<end>21951</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Decoupling the Effect of Chain-of-Thought Reasoning: A Human Label Variation Perspective
%A Chen, Beiduo
%A Hu, Tiancheng
%A Zhang, Caiqi
%A Litschko, Robert
%A Korhonen, Anna
%A Plank, Barbara
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F chen-etal-2026-decoupling
%X Reasoning-tuned LLMs utilizing long Chain-of-Thought (CoT) excel at single-answer tasks, yet their ability to model Human Label Variation—which requires capturing probabilistic ambiguity rather than resolving it—remains underexplored. We investigate this through systematic disentanglement experiments on distribution-based tasks, employing Cross-CoT experiments to isolate the effect of reasoning text from intrinsic model priors. We observe a distinct “decoupled mechanism”: while CoT improves distributional alignment, final accuracy is dictated by CoT content (99% variance contribution), whereas distributional ranking is governed by model priors (over 80%). Step-wise analysis further shows that while CoT’s influence on accuracy grows monotonically during the reasoning process, distributional structure is largely determined by LLM’s intrinsic priors. These findings suggest that long CoT serves as a decisive LLM decision-maker for the top option but fails to function as a granular distribution calibrator for ambiguous tasks.
%U https://aclanthology.org/2026.findings-acl.1103/
%P 21931-21951
Markdown (Informal)
[Decoupling the Effect of Chain-of-Thought Reasoning: A Human Label Variation Perspective](https://aclanthology.org/2026.findings-acl.1103/) (Chen et al., Findings 2026)
ACL