@inproceedings{singh-2026-debiasing,
title = "Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data",
author = "Singh, Navyansh",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.30/",
pages = "363--374",
ISBN = "979-8-89176-393-7",
abstract = "Logical fallacy detection models frequentlyover-flag valid reasoning due to reliance onsurface-level spurious correlations. We in-troduce 703 LLM-generated CounterfactuallyAugmented Data (CAD) pairs{---}minimally dif-ferentiated valid and fallacious arguments{---}todebias models through targeted augmentation.Fine-tuning DeBERTa-v3-large on CoCoLoFaaugmented with these pairs yields marginalin-distribution improvement (+0.4{\%} F1) butsubstantial out-of-distribution robustness: 58{\%}relative reduction in false positive rate (64{\%}{\textrightarrow} 26.7{\%}) on a 300-sample Reddit-sourcedevaluation set. While recent LLMs (Llama-3.1-8B, Llama-3.3-70B) achieve high perfor-mance under optimized prompts (F1 90{--}94{\%}),they degrade severely under simple human-like prompts (F1 63{--}72{\%}, FPR 54{--}74{\%}).Our lightweight, prompt-invariant approachachieves competitive robustness (F1 85.9{\%},FPR 26.7{\%}) across all prompting regimes with-out prompt engineering, making it stable forproduction deployment with unpredictable userinput. The dataset and model are publicly re-leased."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-2026-debiasing">
<titleInfo>
<title>Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Navyansh</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Logical fallacy detection models frequentlyover-flag valid reasoning due to reliance onsurface-level spurious correlations. We in-troduce 703 LLM-generated CounterfactuallyAugmented Data (CAD) pairs—minimally dif-ferentiated valid and fallacious arguments—todebias models through targeted augmentation.Fine-tuning DeBERTa-v3-large on CoCoLoFaaugmented with these pairs yields marginalin-distribution improvement (+0.4% F1) butsubstantial out-of-distribution robustness: 58%relative reduction in false positive rate (64%→ 26.7%) on a 300-sample Reddit-sourcedevaluation set. While recent LLMs (Llama-3.1-8B, Llama-3.3-70B) achieve high perfor-mance under optimized prompts (F1 90–94%),they degrade severely under simple human-like prompts (F1 63–72%, FPR 54–74%).Our lightweight, prompt-invariant approachachieves competitive robustness (F1 85.9%,FPR 26.7%) across all prompting regimes with-out prompt engineering, making it stable forproduction deployment with unpredictable userinput. The dataset and model are publicly re-leased.</abstract>
<identifier type="citekey">singh-2026-debiasing</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.30/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>363</start>
<end>374</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data
%A Singh, Navyansh
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F singh-2026-debiasing
%X Logical fallacy detection models frequentlyover-flag valid reasoning due to reliance onsurface-level spurious correlations. We in-troduce 703 LLM-generated CounterfactuallyAugmented Data (CAD) pairs—minimally dif-ferentiated valid and fallacious arguments—todebias models through targeted augmentation.Fine-tuning DeBERTa-v3-large on CoCoLoFaaugmented with these pairs yields marginalin-distribution improvement (+0.4% F1) butsubstantial out-of-distribution robustness: 58%relative reduction in false positive rate (64%→ 26.7%) on a 300-sample Reddit-sourcedevaluation set. While recent LLMs (Llama-3.1-8B, Llama-3.3-70B) achieve high perfor-mance under optimized prompts (F1 90–94%),they degrade severely under simple human-like prompts (F1 63–72%, FPR 54–74%).Our lightweight, prompt-invariant approachachieves competitive robustness (F1 85.9%,FPR 26.7%) across all prompting regimes with-out prompt engineering, making it stable forproduction deployment with unpredictable userinput. The dataset and model are publicly re-leased.
%U https://aclanthology.org/2026.acl-srw.30/
%P 363-374
Markdown (Informal)
[Debiasing Logical Fallacy Detection for Real-World Robustness via Counterfactually Augmented Data](https://aclanthology.org/2026.acl-srw.30/) (Singh, ACL 2026)
ACL