@inproceedings{saritas-yildiz-2025-reproduction,
title = "A Reproduction Study: The Kernel {PCA} Interpretation of Self-Attention Fails Under Scrutiny",
author = "Sar{\i}ta{\c{s}}, Karahan and
Y{\i}ld{\i}z, {\c{C}}a{\u{g}}atay",
editor = "Zhao, Jin and
Wang, Mingyang and
Liu, Zhu",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-srw.11/",
doi = "10.18653/v1/2025.acl-srw.11",
pages = "173--185",
ISBN = "979-8-89176-254-1",
abstract = "In this reproduction study, we revisit recent claims that self-attention implements kernel principal component analysis (KPCA) (Teo and Nguyen, 2024), positing that (i) value vectors $V$ capture the eigenvectors of the Gram matrix of the keys, and (ii) that self-attention projects queries onto the principal component axes of the key matrix $K$ in a feature space. Our analysis reveals three critical inconsistencies: (1) No alignment exists between learned self-attention value vectors and what is proposed in the KPCA perspective, with average similarity metrics (optimal cosine similarity $\leq 0.32$, linear CKA (Centered Kernel Alignment) $\leq 0.11$, kernel CKA $\leq 0.32$) indicating negligible correspondence; (2) Reported decreases in reconstruction loss $J_\text{proj}$, arguably justifying the claim that the self-attentionminimizes the projection error of KPCA, are misinterpreted, as the quantities involved differ by orders of magnitude ($\sim 10^3$); (3) Gram matrix eigenvalue statistics, introduced to justify that $V$ captures the eigenvector of the gram matrix, are irreproducible without undocumented implementation-specific adjustments. Across 10 transformer architectures, we conclude that the KPCA interpretation of self-attention lacks empirical support."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="saritas-yildiz-2025-reproduction">
<titleInfo>
<title>A Reproduction Study: The Kernel PCA Interpretation of Self-Attention Fails Under Scrutiny</title>
</titleInfo>
<name type="personal">
<namePart type="given">Karahan</namePart>
<namePart type="family">Sarıtaş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Çağatay</namePart>
<namePart type="family">Yıldız</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jin</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingyang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhu</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-254-1</identifier>
</relatedItem>
<abstract>In this reproduction study, we revisit recent claims that self-attention implements kernel principal component analysis (KPCA) (Teo and Nguyen, 2024), positing that (i) value vectors V capture the eigenvectors of the Gram matrix of the keys, and (ii) that self-attention projects queries onto the principal component axes of the key matrix K in a feature space. Our analysis reveals three critical inconsistencies: (1) No alignment exists between learned self-attention value vectors and what is proposed in the KPCA perspective, with average similarity metrics (optimal cosine similarity łeq 0.32, linear CKA (Centered Kernel Alignment) łeq 0.11, kernel CKA łeq 0.32) indicating negligible correspondence; (2) Reported decreases in reconstruction loss J_\textproj, arguably justifying the claim that the self-attentionminimizes the projection error of KPCA, are misinterpreted, as the quantities involved differ by orders of magnitude (\sim 10³); (3) Gram matrix eigenvalue statistics, introduced to justify that V captures the eigenvector of the gram matrix, are irreproducible without undocumented implementation-specific adjustments. Across 10 transformer architectures, we conclude that the KPCA interpretation of self-attention lacks empirical support.</abstract>
<identifier type="citekey">saritas-yildiz-2025-reproduction</identifier>
<identifier type="doi">10.18653/v1/2025.acl-srw.11</identifier>
<location>
<url>https://aclanthology.org/2025.acl-srw.11/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>173</start>
<end>185</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Reproduction Study: The Kernel PCA Interpretation of Self-Attention Fails Under Scrutiny
%A Sarıtaş, Karahan
%A Yıldız, Çağatay
%Y Zhao, Jin
%Y Wang, Mingyang
%Y Liu, Zhu
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-254-1
%F saritas-yildiz-2025-reproduction
%X In this reproduction study, we revisit recent claims that self-attention implements kernel principal component analysis (KPCA) (Teo and Nguyen, 2024), positing that (i) value vectors V capture the eigenvectors of the Gram matrix of the keys, and (ii) that self-attention projects queries onto the principal component axes of the key matrix K in a feature space. Our analysis reveals three critical inconsistencies: (1) No alignment exists between learned self-attention value vectors and what is proposed in the KPCA perspective, with average similarity metrics (optimal cosine similarity łeq 0.32, linear CKA (Centered Kernel Alignment) łeq 0.11, kernel CKA łeq 0.32) indicating negligible correspondence; (2) Reported decreases in reconstruction loss J_\textproj, arguably justifying the claim that the self-attentionminimizes the projection error of KPCA, are misinterpreted, as the quantities involved differ by orders of magnitude (\sim 10³); (3) Gram matrix eigenvalue statistics, introduced to justify that V captures the eigenvector of the gram matrix, are irreproducible without undocumented implementation-specific adjustments. Across 10 transformer architectures, we conclude that the KPCA interpretation of self-attention lacks empirical support.
%R 10.18653/v1/2025.acl-srw.11
%U https://aclanthology.org/2025.acl-srw.11/
%U https://doi.org/10.18653/v1/2025.acl-srw.11
%P 173-185
Markdown (Informal)
[A Reproduction Study: The Kernel PCA Interpretation of Self-Attention Fails Under Scrutiny](https://aclanthology.org/2025.acl-srw.11/) (Sarıtaş & Yıldız, ACL 2025)
ACL