@inproceedings{becker-etal-2026-stay,
title = "Stay Focused: Problem Drift in Multi-Agent Debate",
author = "Becker, Jonas and
Kaesberg, Lars Benedikt and
Stephan, Andreas and
Wahle, Jan Philip and
Ruas, Terry and
Gipp, Bela",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.268/",
pages = "5068--5102",
ISBN = "979-8-89176-386-9",
abstract = "Multi-agent debate {--} multiple instances of large language models discussing problems in turn-based interaction {--} has shown promise for solving knowledge and reasoning tasks. However, these methods show limitations when solving complex problems that require longer reasoning chains. We analyze how multi-agent debate drifts away from the initial problem over multiple turns, thus harming task performance. We define this phenomenon as problem drift and quantify its presence across ten tasks (i.e., three generative, three knowledge, three reasoning, and one instruction-following task). We find that generative tasks drift often due to the subjectivity of the answer space (76-89{\%}), compared to high-complexity tasks (7-21{\%}). To identify the reasons, eight human experts analyze 170 multi-agent debates suffering from problem drift. We find the most common issues related to this drift are the lack of progress (35{\%} of cases), low-quality feedback (26{\%} of cases), and a lack of clarity (25{\%} of cases). We propose DRIFTJudge, an LLM-as-a-judge method, as a first baseline to detect problem drift. We also propose DRIFTPolicy, which mitigates 31{\%} of problem drift cases. Our study is a step toward understanding a key limitation of multi-agent debate, highlighting why longer debates can harm task performance and how problem drift could be addressed."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="becker-etal-2026-stay">
<titleInfo>
<title>Stay Focused: Problem Drift in Multi-Agent Debate</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Becker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="given">Benedikt</namePart>
<namePart type="family">Kaesberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Stephan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="given">Philip</namePart>
<namePart type="family">Wahle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Terry</namePart>
<namePart type="family">Ruas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bela</namePart>
<namePart type="family">Gipp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Multi-agent debate – multiple instances of large language models discussing problems in turn-based interaction – has shown promise for solving knowledge and reasoning tasks. However, these methods show limitations when solving complex problems that require longer reasoning chains. We analyze how multi-agent debate drifts away from the initial problem over multiple turns, thus harming task performance. We define this phenomenon as problem drift and quantify its presence across ten tasks (i.e., three generative, three knowledge, three reasoning, and one instruction-following task). We find that generative tasks drift often due to the subjectivity of the answer space (76-89%), compared to high-complexity tasks (7-21%). To identify the reasons, eight human experts analyze 170 multi-agent debates suffering from problem drift. We find the most common issues related to this drift are the lack of progress (35% of cases), low-quality feedback (26% of cases), and a lack of clarity (25% of cases). We propose DRIFTJudge, an LLM-as-a-judge method, as a first baseline to detect problem drift. We also propose DRIFTPolicy, which mitigates 31% of problem drift cases. Our study is a step toward understanding a key limitation of multi-agent debate, highlighting why longer debates can harm task performance and how problem drift could be addressed.</abstract>
<identifier type="citekey">becker-etal-2026-stay</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.268/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>5068</start>
<end>5102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Stay Focused: Problem Drift in Multi-Agent Debate
%A Becker, Jonas
%A Kaesberg, Lars Benedikt
%A Stephan, Andreas
%A Wahle, Jan Philip
%A Ruas, Terry
%A Gipp, Bela
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F becker-etal-2026-stay
%X Multi-agent debate – multiple instances of large language models discussing problems in turn-based interaction – has shown promise for solving knowledge and reasoning tasks. However, these methods show limitations when solving complex problems that require longer reasoning chains. We analyze how multi-agent debate drifts away from the initial problem over multiple turns, thus harming task performance. We define this phenomenon as problem drift and quantify its presence across ten tasks (i.e., three generative, three knowledge, three reasoning, and one instruction-following task). We find that generative tasks drift often due to the subjectivity of the answer space (76-89%), compared to high-complexity tasks (7-21%). To identify the reasons, eight human experts analyze 170 multi-agent debates suffering from problem drift. We find the most common issues related to this drift are the lack of progress (35% of cases), low-quality feedback (26% of cases), and a lack of clarity (25% of cases). We propose DRIFTJudge, an LLM-as-a-judge method, as a first baseline to detect problem drift. We also propose DRIFTPolicy, which mitigates 31% of problem drift cases. Our study is a step toward understanding a key limitation of multi-agent debate, highlighting why longer debates can harm task performance and how problem drift could be addressed.
%U https://aclanthology.org/2026.findings-eacl.268/
%P 5068-5102
Markdown (Informal)
[Stay Focused: Problem Drift in Multi-Agent Debate](https://aclanthology.org/2026.findings-eacl.268/) (Becker et al., Findings 2026)
ACL
- Jonas Becker, Lars Benedikt Kaesberg, Andreas Stephan, Jan Philip Wahle, Terry Ruas, and Bela Gipp. 2026. Stay Focused: Problem Drift in Multi-Agent Debate. In Findings of the Association for Computational Linguistics: EACL 2026, pages 5068–5102, Rabat, Morocco. Association for Computational Linguistics.