@inproceedings{upadhyay-etal-2026-evidence,
title = "Evidence Grounding vs. Memorization: Why Neural Semantics Matter for Knowledge Graph Fact Verification",
author = "Upadhyay, Ankit Kumar and
Erickson, John S. and
McGuinness, Deborah L.",
editor = "Akhtar, Mubashara and
Aly, Rami and
Cao, Rui and
Christodoulopoulos, Christos and
Cocarascu, Oana and
Guo, Zhijiang and
Mittal, Arpit and
Schlichtkrull, Michael and
Thorne, James and
Vlachos, Andreas",
booktitle = "Proceedings of the Ninth Fact Extraction and {VER}ification Workshop ({FEVER})",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.fever-1.3/",
pages = "28--46",
ISBN = "979-8-89176-365-4",
abstract = "Knowledge graphs like DBpedia enable structured fact verification, but the relative contributions of symbolic structure, neural semantics, and evidence grounding remain unclear. We present a systematic study on FACTKG (108,675 claims) comparing symbolic, neural, and LLM-based approaches. Our symbolic baseline using 29 hand-crafted features covering graph structure, entity coverage, and semantic relation type achieves 66.54{\%} accuracy, while BERT over linearized subgraphs reaches 92.68{\%} and graph neural networks plateau at 70{\%}, demonstrating that token-level semantics outperform both symbolic features and message passing. Using GPT-4.1-mini to filter training data, budget-matched controls show that token-budget control recovers most of the gap over truncation-dominated inputs, while LLM semantic selection adds +1.31 points beyond lexical heuristics (78.85{\%} filtered vs. 77.54{\%} heuristic vs. 52.70{\%} unfiltered), showing that semantic relevance, not just evidence quantity, governs learnability. Finally, comparing 300 test claims under memorization (claim-only) versus KG-grounded reasoning with chain-of-thought, we find KG grounding improves GPT-4o-mini and GPT-4.1-mini accuracy by 12.67 and 9.33 points respectively, with models citing specific triples for interpretability. These results demonstrate that neural semantic representations and explicit KG evidence grounding are highly effective for robust, interpretable fact verification."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="upadhyay-etal-2026-evidence">
<titleInfo>
<title>Evidence Grounding vs. Memorization: Why Neural Semantics Matter for Knowledge Graph Fact Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ankit</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Upadhyay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Erickson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deborah</namePart>
<namePart type="given">L</namePart>
<namePart type="family">McGuinness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Fact Extraction and VERification Workshop (FEVER)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mubashara</namePart>
<namePart type="family">Akhtar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rami</namePart>
<namePart type="family">Aly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oana</namePart>
<namePart type="family">Cocarascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhijiang</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arpit</namePart>
<namePart type="family">Mittal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Schlichtkrull</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Thorne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-365-4</identifier>
</relatedItem>
<abstract>Knowledge graphs like DBpedia enable structured fact verification, but the relative contributions of symbolic structure, neural semantics, and evidence grounding remain unclear. We present a systematic study on FACTKG (108,675 claims) comparing symbolic, neural, and LLM-based approaches. Our symbolic baseline using 29 hand-crafted features covering graph structure, entity coverage, and semantic relation type achieves 66.54% accuracy, while BERT over linearized subgraphs reaches 92.68% and graph neural networks plateau at 70%, demonstrating that token-level semantics outperform both symbolic features and message passing. Using GPT-4.1-mini to filter training data, budget-matched controls show that token-budget control recovers most of the gap over truncation-dominated inputs, while LLM semantic selection adds +1.31 points beyond lexical heuristics (78.85% filtered vs. 77.54% heuristic vs. 52.70% unfiltered), showing that semantic relevance, not just evidence quantity, governs learnability. Finally, comparing 300 test claims under memorization (claim-only) versus KG-grounded reasoning with chain-of-thought, we find KG grounding improves GPT-4o-mini and GPT-4.1-mini accuracy by 12.67 and 9.33 points respectively, with models citing specific triples for interpretability. These results demonstrate that neural semantic representations and explicit KG evidence grounding are highly effective for robust, interpretable fact verification.</abstract>
<identifier type="citekey">upadhyay-etal-2026-evidence</identifier>
<location>
<url>https://aclanthology.org/2026.fever-1.3/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>28</start>
<end>46</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evidence Grounding vs. Memorization: Why Neural Semantics Matter for Knowledge Graph Fact Verification
%A Upadhyay, Ankit Kumar
%A Erickson, John S.
%A McGuinness, Deborah L.
%Y Akhtar, Mubashara
%Y Aly, Rami
%Y Cao, Rui
%Y Christodoulopoulos, Christos
%Y Cocarascu, Oana
%Y Guo, Zhijiang
%Y Mittal, Arpit
%Y Schlichtkrull, Michael
%Y Thorne, James
%Y Vlachos, Andreas
%S Proceedings of the Ninth Fact Extraction and VERification Workshop (FEVER)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-365-4
%F upadhyay-etal-2026-evidence
%X Knowledge graphs like DBpedia enable structured fact verification, but the relative contributions of symbolic structure, neural semantics, and evidence grounding remain unclear. We present a systematic study on FACTKG (108,675 claims) comparing symbolic, neural, and LLM-based approaches. Our symbolic baseline using 29 hand-crafted features covering graph structure, entity coverage, and semantic relation type achieves 66.54% accuracy, while BERT over linearized subgraphs reaches 92.68% and graph neural networks plateau at 70%, demonstrating that token-level semantics outperform both symbolic features and message passing. Using GPT-4.1-mini to filter training data, budget-matched controls show that token-budget control recovers most of the gap over truncation-dominated inputs, while LLM semantic selection adds +1.31 points beyond lexical heuristics (78.85% filtered vs. 77.54% heuristic vs. 52.70% unfiltered), showing that semantic relevance, not just evidence quantity, governs learnability. Finally, comparing 300 test claims under memorization (claim-only) versus KG-grounded reasoning with chain-of-thought, we find KG grounding improves GPT-4o-mini and GPT-4.1-mini accuracy by 12.67 and 9.33 points respectively, with models citing specific triples for interpretability. These results demonstrate that neural semantic representations and explicit KG evidence grounding are highly effective for robust, interpretable fact verification.
%U https://aclanthology.org/2026.fever-1.3/
%P 28-46
Markdown (Informal)
[Evidence Grounding vs. Memorization: Why Neural Semantics Matter for Knowledge Graph Fact Verification](https://aclanthology.org/2026.fever-1.3/) (Upadhyay et al., FEVER 2026)
ACL