@inproceedings{li-etal-2025-support,
title = "On Support Samples of Next Word Prediction",
author = "Li, Yuqian and
Du, Yupei and
Liu, Yufang and
Feng, Feifei and
Feng, Mou Xiao and
Wu, Yuanbin",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.507/",
doi = "10.18653/v1/2025.acl-long.507",
pages = "10277--10289",
ISBN = "979-8-89176-251-0",
abstract = "Language models excel in various tasks by making complex decisions, yet understanding the rationale behind these decisions remains a challenge. This paper investigates \textit{data-centric interpretability} in language models, focusing on the next-word prediction task. Using representer theorem, we identify two types of \textit{support samples}{---}those that either promote or deter specific predictions. Our findings reveal that being a support sample is an intrinsic property, predictable even before training begins. Additionally, while non-support samples are less influential in direct predictions, they play a critical role in preventing overfitting and shaping generalization and representation learning. Notably, the importance of non-support samples increases in deeper layers, suggesting their significant role in intermediate representation formation.These insights shed light on the interplay between data and model decisions, offering a new dimension to understanding language model behavior and interpretability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-support">
<titleInfo>
<title>On Support Samples of Next Word Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuqian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yupei</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yufang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Feifei</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mou</namePart>
<namePart type="given">Xiao</namePart>
<namePart type="family">Feng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuanbin</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Language models excel in various tasks by making complex decisions, yet understanding the rationale behind these decisions remains a challenge. This paper investigates data-centric interpretability in language models, focusing on the next-word prediction task. Using representer theorem, we identify two types of support samples—those that either promote or deter specific predictions. Our findings reveal that being a support sample is an intrinsic property, predictable even before training begins. Additionally, while non-support samples are less influential in direct predictions, they play a critical role in preventing overfitting and shaping generalization and representation learning. Notably, the importance of non-support samples increases in deeper layers, suggesting their significant role in intermediate representation formation.These insights shed light on the interplay between data and model decisions, offering a new dimension to understanding language model behavior and interpretability.</abstract>
<identifier type="citekey">li-etal-2025-support</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.507</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.507/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>10277</start>
<end>10289</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Support Samples of Next Word Prediction
%A Li, Yuqian
%A Du, Yupei
%A Liu, Yufang
%A Feng, Feifei
%A Feng, Mou Xiao
%A Wu, Yuanbin
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F li-etal-2025-support
%X Language models excel in various tasks by making complex decisions, yet understanding the rationale behind these decisions remains a challenge. This paper investigates data-centric interpretability in language models, focusing on the next-word prediction task. Using representer theorem, we identify two types of support samples—those that either promote or deter specific predictions. Our findings reveal that being a support sample is an intrinsic property, predictable even before training begins. Additionally, while non-support samples are less influential in direct predictions, they play a critical role in preventing overfitting and shaping generalization and representation learning. Notably, the importance of non-support samples increases in deeper layers, suggesting their significant role in intermediate representation formation.These insights shed light on the interplay between data and model decisions, offering a new dimension to understanding language model behavior and interpretability.
%R 10.18653/v1/2025.acl-long.507
%U https://aclanthology.org/2025.acl-long.507/
%U https://doi.org/10.18653/v1/2025.acl-long.507
%P 10277-10289
Markdown (Informal)
[On Support Samples of Next Word Prediction](https://aclanthology.org/2025.acl-long.507/) (Li et al., ACL 2025)
ACL
- Yuqian Li, Yupei Du, Yufang Liu, Feifei Feng, Mou Xiao Feng, and Yuanbin Wu. 2025. On Support Samples of Next Word Prediction. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 10277–10289, Vienna, Austria. Association for Computational Linguistics.