@inproceedings{shukla-etal-2025-recon,
title = "Recon, Answer, Verify: Agents in Search of Truth",
author = "Shukla, Satyam and
Dutta, Himanshu and
Bhattacharyya, Pushpak",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-industry.167/",
pages = "2429--2448",
ISBN = "979-8-89176-333-3",
abstract = "Human fact-checking is too slow to meet current demands, making automatic fact-checking system an essential alternative. Evaluating such systems is challenging as existing benchmark datasets either suffer from leakage or evidence incompleteness. This limits the realism of current evaluations. We present $\textbf{Politi-Fact-Only (PFO)}$, a 5-class benchmark dataset of 2,982 political claims from politifact.com, where all post-claim analysis and annotator cues have been removed manually from evidence article. After filtration, evidence contains information available prior to the claim{'}s verification. By evaluating PFO, we see an average performance drop of $\textbf{11.39\%}$ in terms of macro-f1 compared to PFO{'}s unfiltered version. Based on the identified challenges of the existing LLM-based fact-checking system, we propose $\textbf{RAV (Recon-Answer-Verify)}$, an agentic framework with three agents, it iteratively generates and answers sub-questions to verify different aspects of the claim before finally generating the label. Unlike prior literature, we worked on reducing the follow-up question complexity by leveraging two 2 types of structured questions, which either validate a fact or inquire about a fact. RAV generalizes across both domains and label granularities, outperforming state-of-the-art methods by $\textbf{57.5\%}$ on PFO $\textit{(political, 5-class)}$ and by $\textbf{3.05\%}$ on the widely used HOVER dataset $\textit{(encyclopedic, 2-class)}$."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="shukla-etal-2025-recon">
<titleInfo>
<title>Recon, Answer, Verify: Agents in Search of Truth</title>
</titleInfo>
<name type="personal">
<namePart type="given">Satyam</namePart>
<namePart type="family">Shukla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Himanshu</namePart>
<namePart type="family">Dutta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saloni</namePart>
<namePart type="family">Potdar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lina</namePart>
<namePart type="family">Rojas-Barahona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastien</namePart>
<namePart type="family">Montella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou (China)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-333-3</identifier>
</relatedItem>
<abstract>Human fact-checking is too slow to meet current demands, making automatic fact-checking system an essential alternative. Evaluating such systems is challenging as existing benchmark datasets either suffer from leakage or evidence incompleteness. This limits the realism of current evaluations. We present Politi-Fact-Only (PFO), a 5-class benchmark dataset of 2,982 political claims from politifact.com, where all post-claim analysis and annotator cues have been removed manually from evidence article. After filtration, evidence contains information available prior to the claim’s verification. By evaluating PFO, we see an average performance drop of 11.39% in terms of macro-f1 compared to PFO’s unfiltered version. Based on the identified challenges of the existing LLM-based fact-checking system, we propose RAV (Recon-Answer-Verify), an agentic framework with three agents, it iteratively generates and answers sub-questions to verify different aspects of the claim before finally generating the label. Unlike prior literature, we worked on reducing the follow-up question complexity by leveraging two 2 types of structured questions, which either validate a fact or inquire about a fact. RAV generalizes across both domains and label granularities, outperforming state-of-the-art methods by 57.5% on PFO (political, 5-class) and by 3.05% on the widely used HOVER dataset (encyclopedic, 2-class).</abstract>
<identifier type="citekey">shukla-etal-2025-recon</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-industry.167/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2429</start>
<end>2448</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recon, Answer, Verify: Agents in Search of Truth
%A Shukla, Satyam
%A Dutta, Himanshu
%A Bhattacharyya, Pushpak
%Y Potdar, Saloni
%Y Rojas-Barahona, Lina
%Y Montella, Sebastien
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou (China)
%@ 979-8-89176-333-3
%F shukla-etal-2025-recon
%X Human fact-checking is too slow to meet current demands, making automatic fact-checking system an essential alternative. Evaluating such systems is challenging as existing benchmark datasets either suffer from leakage or evidence incompleteness. This limits the realism of current evaluations. We present Politi-Fact-Only (PFO), a 5-class benchmark dataset of 2,982 political claims from politifact.com, where all post-claim analysis and annotator cues have been removed manually from evidence article. After filtration, evidence contains information available prior to the claim’s verification. By evaluating PFO, we see an average performance drop of 11.39% in terms of macro-f1 compared to PFO’s unfiltered version. Based on the identified challenges of the existing LLM-based fact-checking system, we propose RAV (Recon-Answer-Verify), an agentic framework with three agents, it iteratively generates and answers sub-questions to verify different aspects of the claim before finally generating the label. Unlike prior literature, we worked on reducing the follow-up question complexity by leveraging two 2 types of structured questions, which either validate a fact or inquire about a fact. RAV generalizes across both domains and label granularities, outperforming state-of-the-art methods by 57.5% on PFO (political, 5-class) and by 3.05% on the widely used HOVER dataset (encyclopedic, 2-class).
%U https://aclanthology.org/2025.emnlp-industry.167/
%P 2429-2448
Markdown (Informal)
[Recon, Answer, Verify: Agents in Search of Truth](https://aclanthology.org/2025.emnlp-industry.167/) (Shukla et al., EMNLP 2025)
ACL
- Satyam Shukla, Himanshu Dutta, and Pushpak Bhattacharyya. 2025. Recon, Answer, Verify: Agents in Search of Truth. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 2429–2448, Suzhou (China). Association for Computational Linguistics.