@inproceedings{yang-etal-2026-thoughts,
title = "Do Thoughts Depth Affect Multilingual Reasoning?",
author = "Yang, Linjian and
Wang, Xinyan and
Liu, Kunpeng",
editor = "Huang, Kaiyu and
Mo, Fengran and
Chen, Pinzhen and
Jiang, Meng",
booktitle = "Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models ({M}e{LLM} 2026)",
month = jul,
year = "2026",
address = "San Diego, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mellm-1.14/",
pages = "149--158",
ISBN = "979-8-89176-430-9",
abstract = "Chain-of-Thought (CoT) is commonly used to improve reasoning performance in large language models. We investigate its impact in multilingual contexts by systematically constraining reasoning steps across languages with varying resource levels. This study evaluates two models on two benchmarks with seven languages, comparing constrained CoT depth against zero-shot and free-CoT baselines. We demonstrate that increasing the number of reasoning steps does not consistently improve accuracy across various languages. While high-resource and mid-resource languages remain stable, low-resource languages often experience a decline in performance as the number of reasoning steps increases. We attribute this decline to error accumulation and reasoning noise, which are amplified under deeper reasoning in low-resource languages. These findings indicate that CoT is not inherently beneficial, but its effectiveness is significantly influenced by the interaction between reasoning steps and language resource availability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-etal-2026-thoughts">
<titleInfo>
<title>Do Thoughts Depth Affect Multilingual Reasoning?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Linjian</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyan</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kunpeng</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kaiyu</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fengran</namePart>
<namePart type="family">Mo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pinzhen</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-430-9</identifier>
</relatedItem>
<abstract>Chain-of-Thought (CoT) is commonly used to improve reasoning performance in large language models. We investigate its impact in multilingual contexts by systematically constraining reasoning steps across languages with varying resource levels. This study evaluates two models on two benchmarks with seven languages, comparing constrained CoT depth against zero-shot and free-CoT baselines. We demonstrate that increasing the number of reasoning steps does not consistently improve accuracy across various languages. While high-resource and mid-resource languages remain stable, low-resource languages often experience a decline in performance as the number of reasoning steps increases. We attribute this decline to error accumulation and reasoning noise, which are amplified under deeper reasoning in low-resource languages. These findings indicate that CoT is not inherently beneficial, but its effectiveness is significantly influenced by the interaction between reasoning steps and language resource availability.</abstract>
<identifier type="citekey">yang-etal-2026-thoughts</identifier>
<location>
<url>https://aclanthology.org/2026.mellm-1.14/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>149</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Do Thoughts Depth Affect Multilingual Reasoning?
%A Yang, Linjian
%A Wang, Xinyan
%A Liu, Kunpeng
%Y Huang, Kaiyu
%Y Mo, Fengran
%Y Chen, Pinzhen
%Y Jiang, Meng
%S Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, United States
%@ 979-8-89176-430-9
%F yang-etal-2026-thoughts
%X Chain-of-Thought (CoT) is commonly used to improve reasoning performance in large language models. We investigate its impact in multilingual contexts by systematically constraining reasoning steps across languages with varying resource levels. This study evaluates two models on two benchmarks with seven languages, comparing constrained CoT depth against zero-shot and free-CoT baselines. We demonstrate that increasing the number of reasoning steps does not consistently improve accuracy across various languages. While high-resource and mid-resource languages remain stable, low-resource languages often experience a decline in performance as the number of reasoning steps increases. We attribute this decline to error accumulation and reasoning noise, which are amplified under deeper reasoning in low-resource languages. These findings indicate that CoT is not inherently beneficial, but its effectiveness is significantly influenced by the interaction between reasoning steps and language resource availability.
%U https://aclanthology.org/2026.mellm-1.14/
%P 149-158
Markdown (Informal)
[Do Thoughts Depth Affect Multilingual Reasoning?](https://aclanthology.org/2026.mellm-1.14/) (Yang et al., MeLLM 2026)
ACL
- Linjian Yang, Xinyan Wang, and Kunpeng Liu. 2026. Do Thoughts Depth Affect Multilingual Reasoning?. In Proceedings of the 1st Workshop on Multilinguality in the Era of Large Language Models (MeLLM 2026), pages 149–158, San Diego, United States. Association for Computational Linguistics.