@inproceedings{verma-etal-2026-selene,
title = "{SELENE}: Selective and Evidence-Weighted {LLM} Debating for Efficient and Reliable Reasoning",
author = "Verma, Akshay and
Gupta, Swapnil and
Gupta, Deepak and
Sircar, Prateek and
Pillai, Siddharth",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-industry.7/",
pages = "95--104",
ISBN = "979-8-89176-384-5",
abstract = "Multi-Agent Debate (MAD) frameworks improve factual reliability in large language models (LLMs) by allowing agents to critiqueand refine one another{'}s reasoning. Yet, existing MAD systems are computationally expensive and prone to degradation under pro-longed debates due to redundant exchanges and unstable judging. We propose a lightweight,industry-deployable alternative that unifies Selective Debate Initiation (SDI) with Evidence Weighted Self-Consistency (EWSC) for adaptive, debate-on-demand reasoning. SDI dynamically predicts when debate is necessary by detecting confidence-likelihood misalignment and semantic disagreement, skippingwell-aligned queries to conserve computation. EWSC replaces a single-judge verdict with a variance-aware, evidence-weighted aggregation across paraphrased evaluations, yielding more stable factual judgments. Combined, SDI and EWSC reduce token consumption by nearly 50{\%} while improving both accuracy and calibration. Evaluated on BoolQ, CosmosQA, and an internal QnA benchmark, our framework achieves higher factual robustness and efficiency, demonstrating that scalable, epistemically reliable multi-agent reasoning is practical for real-world LLM deployments."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="verma-etal-2026-selene">
<titleInfo>
<title>SELENE: Selective and Evidence-Weighted LLM Debating for Efficient and Reliable Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Akshay</namePart>
<namePart type="family">Verma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapnil</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deepak</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Prateek</namePart>
<namePart type="family">Sircar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siddharth</namePart>
<namePart type="family">Pillai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yevgen</namePart>
<namePart type="family">Matusevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gülşen</namePart>
<namePart type="family">Eryiğit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-384-5</identifier>
</relatedItem>
<abstract>Multi-Agent Debate (MAD) frameworks improve factual reliability in large language models (LLMs) by allowing agents to critiqueand refine one another’s reasoning. Yet, existing MAD systems are computationally expensive and prone to degradation under pro-longed debates due to redundant exchanges and unstable judging. We propose a lightweight,industry-deployable alternative that unifies Selective Debate Initiation (SDI) with Evidence Weighted Self-Consistency (EWSC) for adaptive, debate-on-demand reasoning. SDI dynamically predicts when debate is necessary by detecting confidence-likelihood misalignment and semantic disagreement, skippingwell-aligned queries to conserve computation. EWSC replaces a single-judge verdict with a variance-aware, evidence-weighted aggregation across paraphrased evaluations, yielding more stable factual judgments. Combined, SDI and EWSC reduce token consumption by nearly 50% while improving both accuracy and calibration. Evaluated on BoolQ, CosmosQA, and an internal QnA benchmark, our framework achieves higher factual robustness and efficiency, demonstrating that scalable, epistemically reliable multi-agent reasoning is practical for real-world LLM deployments.</abstract>
<identifier type="citekey">verma-etal-2026-selene</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-industry.7/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>95</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SELENE: Selective and Evidence-Weighted LLM Debating for Efficient and Reliable Reasoning
%A Verma, Akshay
%A Gupta, Swapnil
%A Gupta, Deepak
%A Sircar, Prateek
%A Pillai, Siddharth
%Y Matusevych, Yevgen
%Y Eryiğit, Gülşen
%Y Aletras, Nikolaos
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-384-5
%F verma-etal-2026-selene
%X Multi-Agent Debate (MAD) frameworks improve factual reliability in large language models (LLMs) by allowing agents to critiqueand refine one another’s reasoning. Yet, existing MAD systems are computationally expensive and prone to degradation under pro-longed debates due to redundant exchanges and unstable judging. We propose a lightweight,industry-deployable alternative that unifies Selective Debate Initiation (SDI) with Evidence Weighted Self-Consistency (EWSC) for adaptive, debate-on-demand reasoning. SDI dynamically predicts when debate is necessary by detecting confidence-likelihood misalignment and semantic disagreement, skippingwell-aligned queries to conserve computation. EWSC replaces a single-judge verdict with a variance-aware, evidence-weighted aggregation across paraphrased evaluations, yielding more stable factual judgments. Combined, SDI and EWSC reduce token consumption by nearly 50% while improving both accuracy and calibration. Evaluated on BoolQ, CosmosQA, and an internal QnA benchmark, our framework achieves higher factual robustness and efficiency, demonstrating that scalable, epistemically reliable multi-agent reasoning is practical for real-world LLM deployments.
%U https://aclanthology.org/2026.eacl-industry.7/
%P 95-104
Markdown (Informal)
[SELENE: Selective and Evidence-Weighted LLM Debating for Efficient and Reliable Reasoning](https://aclanthology.org/2026.eacl-industry.7/) (Verma et al., EACL 2026)
ACL