@inproceedings{bader-etal-2025-covegat-hybrid,
title = "{C}o{V}e{GAT}: A Hybrid {LLM} {\&} {G}raph{-}{A}ttention Pipeline for Accurate {C}itation{-}{A}ligned Claim Verification",
author = "Bader, Max and
Arunkumar, Akshatha and
Ahmad, Ohan and
Hassen, Maruf and
Duong, Charles and
Sharma, Vasu and
O{'}Brien, Sean and
Zhu, Kevin",
editor = "Picazo-Izquierdo, Alicia and
Estevanell-Valladares, Ernesto Luis and
Mitkov, Ruslan and
Guillena, Rafael Mu{\~n}oz and
Cerd{\'a}, Ra{\'u}l Garc{\'i}a",
booktitle = "Proceedings of the First Workshop on Comparative Performance Evaluation: From Rules to Language Models",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.r2lm-1.1/",
pages = "1--9",
abstract = "Modern LLMs often generate fluent text yet fabricate, misquote, or misattribute evidence. To quantify this flaw, we built a balanced Citation{-}Alignment Dataset of 500 genuine, expert{-}verified claim{--}quote pairs and 500 minimally perturbed false variants from news, legal, scientific, and literary sources. We then propose CoVeGAT, which converts claims and citations into SVO triplets (with trigram fallback), scores each pair via an LLM{-}driven chain of verification, and embeds them in a weighted semantic graph. A Graph Attention Network over BERT embeddings issues strict pass/fail judgments on alignment. Zero{-}shot evaluation of seven top LLMs (e.g., GPT{-}4o, Gemini 1.5, Mistral 7B) reveals a trade{-}off: decisive models reach 82.5 {\%} accuracy but err confidently, while cautious ones fall below 50 {\%}. A MiniLM + RBF kernel baseline, by contrast, achieves 96.4 {\%} accuracy, underscoring the power of simple, interpretable methods."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bader-etal-2025-covegat-hybrid">
<titleInfo>
<title>CoVeGAT: A Hybrid LLM & Graph-Attention Pipeline for Accurate Citation-Aligned Claim Verification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Max</namePart>
<namePart type="family">Bader</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akshatha</namePart>
<namePart type="family">Arunkumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ohan</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maruf</namePart>
<namePart type="family">Hassen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charles</namePart>
<namePart type="family">Duong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasu</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">O’Brien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Comparative Performance Evaluation: From Rules to Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Picazo-Izquierdo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ernesto</namePart>
<namePart type="given">Luis</namePart>
<namePart type="family">Estevanell-Valladares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafael</namePart>
<namePart type="given">Muñoz</namePart>
<namePart type="family">Guillena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raúl</namePart>
<namePart type="given">García</namePart>
<namePart type="family">Cerdá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern LLMs often generate fluent text yet fabricate, misquote, or misattribute evidence. To quantify this flaw, we built a balanced Citation-Alignment Dataset of 500 genuine, expert-verified claim–quote pairs and 500 minimally perturbed false variants from news, legal, scientific, and literary sources. We then propose CoVeGAT, which converts claims and citations into SVO triplets (with trigram fallback), scores each pair via an LLM-driven chain of verification, and embeds them in a weighted semantic graph. A Graph Attention Network over BERT embeddings issues strict pass/fail judgments on alignment. Zero-shot evaluation of seven top LLMs (e.g., GPT-4o, Gemini 1.5, Mistral 7B) reveals a trade-off: decisive models reach 82.5 % accuracy but err confidently, while cautious ones fall below 50 %. A MiniLM + RBF kernel baseline, by contrast, achieves 96.4 % accuracy, underscoring the power of simple, interpretable methods.</abstract>
<identifier type="citekey">bader-etal-2025-covegat-hybrid</identifier>
<location>
<url>https://aclanthology.org/2025.r2lm-1.1/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CoVeGAT: A Hybrid LLM & Graph-Attention Pipeline for Accurate Citation-Aligned Claim Verification
%A Bader, Max
%A Arunkumar, Akshatha
%A Ahmad, Ohan
%A Hassen, Maruf
%A Duong, Charles
%A Sharma, Vasu
%A O’Brien, Sean
%A Zhu, Kevin
%Y Picazo-Izquierdo, Alicia
%Y Estevanell-Valladares, Ernesto Luis
%Y Mitkov, Ruslan
%Y Guillena, Rafael Muñoz
%Y Cerdá, Raúl García
%S Proceedings of the First Workshop on Comparative Performance Evaluation: From Rules to Language Models
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F bader-etal-2025-covegat-hybrid
%X Modern LLMs often generate fluent text yet fabricate, misquote, or misattribute evidence. To quantify this flaw, we built a balanced Citation-Alignment Dataset of 500 genuine, expert-verified claim–quote pairs and 500 minimally perturbed false variants from news, legal, scientific, and literary sources. We then propose CoVeGAT, which converts claims and citations into SVO triplets (with trigram fallback), scores each pair via an LLM-driven chain of verification, and embeds them in a weighted semantic graph. A Graph Attention Network over BERT embeddings issues strict pass/fail judgments on alignment. Zero-shot evaluation of seven top LLMs (e.g., GPT-4o, Gemini 1.5, Mistral 7B) reveals a trade-off: decisive models reach 82.5 % accuracy but err confidently, while cautious ones fall below 50 %. A MiniLM + RBF kernel baseline, by contrast, achieves 96.4 % accuracy, underscoring the power of simple, interpretable methods.
%U https://aclanthology.org/2025.r2lm-1.1/
%P 1-9
Markdown (Informal)
[CoVeGAT: A Hybrid LLM & Graph‐Attention Pipeline for Accurate Citation‐Aligned Claim Verification](https://aclanthology.org/2025.r2lm-1.1/) (Bader et al., R2LM 2025)
ACL
- Max Bader, Akshatha Arunkumar, Ohan Ahmad, Maruf Hassen, Charles Duong, Vasu Sharma, Sean O’Brien, and Kevin Zhu. 2025. CoVeGAT: A Hybrid LLM & Graph‐Attention Pipeline for Accurate Citation‐Aligned Claim Verification. In Proceedings of the First Workshop on Comparative Performance Evaluation: From Rules to Language Models, pages 1–9, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.