@inproceedings{hakimov-etal-2026-price,
title = "The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models",
author = "Hakimov, Sherzod and
Bernard, Roland and
Leiber, Tim and
Osswald, Karl and
Richert, Kristina and
Yang, Ruilin and
Bernardi, Raffaella and
Schlangen, David",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.27/",
pages = "529--570",
ISBN = "979-8-89176-386-9",
abstract = "Negotiation is a fundamental challenge for AI agents, as it requires an ability to reason strategically, model opponents, and balance cooperation with competition. We present the first comprehensive study that systematically evaluates how explicit reasoning training affects the negotiation abilities of both commercial and open-weight large language models, comparing these models to their vanilla counterparts across three languages. Using a self-play setup across three diverse dialogue games, we analyse trade-offs between performance and cost, the language consistency of reasoning processes, and the nature of strategic adaptation exhibited by models.Our findings show that enabling reasoning{---}that is, scaling test time compute{---}significantly improves negotiation outcomes by enhancing collaboration and helping models overcome task complexities, but comes at a substantial computational cost: reasoning improves GPT-5{'}s performance by 31.4 {\%} while increasing its cost by nearly 400 {\%}. Most critically, we uncover a significant multilingual reasoning distinction: open-weight models consistently switch to English for their internal reasoning steps, even when negotiating in German or Italian (and thus possibly impacting potential explainability gains through the disclosure of reasoning traces), while a leading commercial model maintains language consistency between reasoning and final output."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hakimov-etal-2026-price">
<titleInfo>
<title>The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sherzod</namePart>
<namePart type="family">Hakimov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roland</namePart>
<namePart type="family">Bernard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Leiber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karl</namePart>
<namePart type="family">Osswald</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Richert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruilin</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raffaella</namePart>
<namePart type="family">Bernardi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Negotiation is a fundamental challenge for AI agents, as it requires an ability to reason strategically, model opponents, and balance cooperation with competition. We present the first comprehensive study that systematically evaluates how explicit reasoning training affects the negotiation abilities of both commercial and open-weight large language models, comparing these models to their vanilla counterparts across three languages. Using a self-play setup across three diverse dialogue games, we analyse trade-offs between performance and cost, the language consistency of reasoning processes, and the nature of strategic adaptation exhibited by models.Our findings show that enabling reasoning—that is, scaling test time compute—significantly improves negotiation outcomes by enhancing collaboration and helping models overcome task complexities, but comes at a substantial computational cost: reasoning improves GPT-5’s performance by 31.4 % while increasing its cost by nearly 400 %. Most critically, we uncover a significant multilingual reasoning distinction: open-weight models consistently switch to English for their internal reasoning steps, even when negotiating in German or Italian (and thus possibly impacting potential explainability gains through the disclosure of reasoning traces), while a leading commercial model maintains language consistency between reasoning and final output.</abstract>
<identifier type="citekey">hakimov-etal-2026-price</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.27/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>529</start>
<end>570</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models
%A Hakimov, Sherzod
%A Bernard, Roland
%A Leiber, Tim
%A Osswald, Karl
%A Richert, Kristina
%A Yang, Ruilin
%A Bernardi, Raffaella
%A Schlangen, David
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F hakimov-etal-2026-price
%X Negotiation is a fundamental challenge for AI agents, as it requires an ability to reason strategically, model opponents, and balance cooperation with competition. We present the first comprehensive study that systematically evaluates how explicit reasoning training affects the negotiation abilities of both commercial and open-weight large language models, comparing these models to their vanilla counterparts across three languages. Using a self-play setup across three diverse dialogue games, we analyse trade-offs between performance and cost, the language consistency of reasoning processes, and the nature of strategic adaptation exhibited by models.Our findings show that enabling reasoning—that is, scaling test time compute—significantly improves negotiation outcomes by enhancing collaboration and helping models overcome task complexities, but comes at a substantial computational cost: reasoning improves GPT-5’s performance by 31.4 % while increasing its cost by nearly 400 %. Most critically, we uncover a significant multilingual reasoning distinction: open-weight models consistently switch to English for their internal reasoning steps, even when negotiating in German or Italian (and thus possibly impacting potential explainability gains through the disclosure of reasoning traces), while a leading commercial model maintains language consistency between reasoning and final output.
%U https://aclanthology.org/2026.findings-eacl.27/
%P 529-570
Markdown (Informal)
[The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models](https://aclanthology.org/2026.findings-eacl.27/) (Hakimov et al., Findings 2026)
ACL
- Sherzod Hakimov, Roland Bernard, Tim Leiber, Karl Osswald, Kristina Richert, Ruilin Yang, Raffaella Bernardi, and David Schlangen. 2026. The Price of Thought: A Multilingual Analysis of Reasoning, Performance, and Cost of Negotiation in Large Language Models. In Findings of the Association for Computational Linguistics: EACL 2026, pages 529–570, Rabat, Morocco. Association for Computational Linguistics.