@inproceedings{walden-etal-2026-grounded,
title = "How Grounded is {W}ikipedia? A Study on Structured Evidential Support and Retrieval",
author = "Walden, William Gantt and
Ricci, Kathryn and
Wanner, Miriam and
Jiang, Zhengping and
May, Chandler and
Zhou, Rongkun and
Van Durme, Benjamin",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.19/",
pages = "400--420",
ISBN = "979-8-89176-395-1",
abstract = "Wikipedia is a critical resource for modern NLP, serving as a rich repository of up-to-date and citation-backed information on a wide variety of subjects. The reliability of Wikipedia{---}its groundedness in its cited sources{---}is vital to this purpose. This work analyzes both how grounded Wikipedia is and how readily fine-grained grounding evidence can be retrieved. To this end, we introduce PeopleProfiles{---}a large-scale, multi-level dataset of claim support annotations on biographical Wikipedia articles. We show that: {\textasciitilde}22{\%} of claims in Wikipedia *lead* sections are unsupported by the article body; {\textasciitilde}30{\%} of annotated claims in the article *body* are unsupported by their (publicly accessible) sources; and real-world Wikipedia citation practices often differ from documented standards. Finally, we show that complex evidence retrieval remains a challenge{---}even for recent reasoning rerankers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="walden-etal-2026-grounded">
<titleInfo>
<title>How Grounded is Wikipedia? A Study on Structured Evidential Support and Retrieval</title>
</titleInfo>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">Gantt</namePart>
<namePart type="family">Walden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kathryn</namePart>
<namePart type="family">Ricci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhengping</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chandler</namePart>
<namePart type="family">May</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rongkun</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Van Durme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Wikipedia is a critical resource for modern NLP, serving as a rich repository of up-to-date and citation-backed information on a wide variety of subjects. The reliability of Wikipedia—its groundedness in its cited sources—is vital to this purpose. This work analyzes both how grounded Wikipedia is and how readily fine-grained grounding evidence can be retrieved. To this end, we introduce PeopleProfiles—a large-scale, multi-level dataset of claim support annotations on biographical Wikipedia articles. We show that: ~22% of claims in Wikipedia *lead* sections are unsupported by the article body; ~30% of annotated claims in the article *body* are unsupported by their (publicly accessible) sources; and real-world Wikipedia citation practices often differ from documented standards. Finally, we show that complex evidence retrieval remains a challenge—even for recent reasoning rerankers.</abstract>
<identifier type="citekey">walden-etal-2026-grounded</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.19/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>400</start>
<end>420</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Grounded is Wikipedia? A Study on Structured Evidential Support and Retrieval
%A Walden, William Gantt
%A Ricci, Kathryn
%A Wanner, Miriam
%A Jiang, Zhengping
%A May, Chandler
%A Zhou, Rongkun
%A Van Durme, Benjamin
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F walden-etal-2026-grounded
%X Wikipedia is a critical resource for modern NLP, serving as a rich repository of up-to-date and citation-backed information on a wide variety of subjects. The reliability of Wikipedia—its groundedness in its cited sources—is vital to this purpose. This work analyzes both how grounded Wikipedia is and how readily fine-grained grounding evidence can be retrieved. To this end, we introduce PeopleProfiles—a large-scale, multi-level dataset of claim support annotations on biographical Wikipedia articles. We show that: ~22% of claims in Wikipedia *lead* sections are unsupported by the article body; ~30% of annotated claims in the article *body* are unsupported by their (publicly accessible) sources; and real-world Wikipedia citation practices often differ from documented standards. Finally, we show that complex evidence retrieval remains a challenge—even for recent reasoning rerankers.
%U https://aclanthology.org/2026.findings-acl.19/
%P 400-420
Markdown (Informal)
[How Grounded is Wikipedia? A Study on Structured Evidential Support and Retrieval](https://aclanthology.org/2026.findings-acl.19/) (Walden et al., Findings 2026)
ACL
- William Gantt Walden, Kathryn Ricci, Miriam Wanner, Zhengping Jiang, Chandler May, Rongkun Zhou, and Benjamin Van Durme. 2026. How Grounded is Wikipedia? A Study on Structured Evidential Support and Retrieval. In Findings of the Association for Computational Linguistics: ACL 2026, pages 400–420, San Diego, California, United States. Association for Computational Linguistics.