@inproceedings{xiao-etal-2025-tokenshapley,
title = "{T}oken{S}hapley: Token Level Context Attribution with Shapley Value",
author = "Xiao, Yingtai and
Zhu, Yuqing and
Samyoun, Sirat and
Zhang, Wanrong and
Wang, Jiachen T. and
Du, Jian",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.200/",
doi = "10.18653/v1/2025.findings-acl.200",
pages = "3882--3894",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) demonstrate strong capabilities in in-context learning, but verifying the correctness of their generated responses remains a challenge. Prior work has explored attribution at the sentence level, but these methods fall short when users seek attribution for specific keywords within the response, such as numbers, years, or names. To address this limitation, we propose TokenShapley, a novel token-level attribution method that combines Shapley value-based data attribution with KNN-based retrieval techniques inspired by recent advances in KNN-augmented LLMs. By leveraging a precomputed datastore for contextual retrieval and computing Shapley values to quantify token importance, TokenShapley provides a fine-grained data attribution approach. Extensive evaluations on four benchmarks show that TokenShapley outperforms state-of-the-art baselines in token-level attribution, achieving a 11{--}23{\%} improvement in accuracy."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiao-etal-2025-tokenshapley">
<titleInfo>
<title>TokenShapley: Token Level Context Attribution with Shapley Value</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yingtai</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuqing</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sirat</namePart>
<namePart type="family">Samyoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanrong</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiachen</namePart>
<namePart type="given">T</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large language models (LLMs) demonstrate strong capabilities in in-context learning, but verifying the correctness of their generated responses remains a challenge. Prior work has explored attribution at the sentence level, but these methods fall short when users seek attribution for specific keywords within the response, such as numbers, years, or names. To address this limitation, we propose TokenShapley, a novel token-level attribution method that combines Shapley value-based data attribution with KNN-based retrieval techniques inspired by recent advances in KNN-augmented LLMs. By leveraging a precomputed datastore for contextual retrieval and computing Shapley values to quantify token importance, TokenShapley provides a fine-grained data attribution approach. Extensive evaluations on four benchmarks show that TokenShapley outperforms state-of-the-art baselines in token-level attribution, achieving a 11–23% improvement in accuracy.</abstract>
<identifier type="citekey">xiao-etal-2025-tokenshapley</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.200</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.200/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>3882</start>
<end>3894</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TokenShapley: Token Level Context Attribution with Shapley Value
%A Xiao, Yingtai
%A Zhu, Yuqing
%A Samyoun, Sirat
%A Zhang, Wanrong
%A Wang, Jiachen T.
%A Du, Jian
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F xiao-etal-2025-tokenshapley
%X Large language models (LLMs) demonstrate strong capabilities in in-context learning, but verifying the correctness of their generated responses remains a challenge. Prior work has explored attribution at the sentence level, but these methods fall short when users seek attribution for specific keywords within the response, such as numbers, years, or names. To address this limitation, we propose TokenShapley, a novel token-level attribution method that combines Shapley value-based data attribution with KNN-based retrieval techniques inspired by recent advances in KNN-augmented LLMs. By leveraging a precomputed datastore for contextual retrieval and computing Shapley values to quantify token importance, TokenShapley provides a fine-grained data attribution approach. Extensive evaluations on four benchmarks show that TokenShapley outperforms state-of-the-art baselines in token-level attribution, achieving a 11–23% improvement in accuracy.
%R 10.18653/v1/2025.findings-acl.200
%U https://aclanthology.org/2025.findings-acl.200/
%U https://doi.org/10.18653/v1/2025.findings-acl.200
%P 3882-3894
Markdown (Informal)
[TokenShapley: Token Level Context Attribution with Shapley Value](https://aclanthology.org/2025.findings-acl.200/) (Xiao et al., Findings 2025)
ACL