@inproceedings{cho-etal-2025-versatility,
title = "On the Versatility of Sparse Autoencoders for In-Context Learning",
author = "Cho, Ikhyun and
Kwon, Gaeul and
Hockenmaier, Julia",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1063/",
pages = "19531--19538",
ISBN = "979-8-89176-335-7",
abstract = "Sparse autoencoders (SAEs) are emerging as a key analytical tool in the field of mechanistic interpretability for large language models (LLMs). While SAEs have primarily been used for interpretability, we shift focus and explore an understudied question: ``Can SAEs be applied to practical tasks beyond interpretability?'' Given that SAEs are trained on billions of tokens for sparse reconstruction, we believe they can serve as effective extractors, offering a wide range of useful knowledge that can benefit practical applications. Building on this motivation, we demonstrate that SAEs can be effectively applied to in-context learning (ICL). In particular, we highlight the utility of the SAE-reconstruction loss by showing that it provides a valuable signal in ICL{---}exhibiting a strong correlation with LLM performance and offering a powerful unsupervised approach for prompt selection. These findings underscore the versatility of SAEs and reveal their potential for real-world applications beyond interpretability. Our code is available at https://github.com/ihcho2/SAE-GPS."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2025-versatility">
<titleInfo>
<title>On the Versatility of Sparse Autoencoders for In-Context Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ikhyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gaeul</namePart>
<namePart type="family">Kwon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Sparse autoencoders (SAEs) are emerging as a key analytical tool in the field of mechanistic interpretability for large language models (LLMs). While SAEs have primarily been used for interpretability, we shift focus and explore an understudied question: “Can SAEs be applied to practical tasks beyond interpretability?” Given that SAEs are trained on billions of tokens for sparse reconstruction, we believe they can serve as effective extractors, offering a wide range of useful knowledge that can benefit practical applications. Building on this motivation, we demonstrate that SAEs can be effectively applied to in-context learning (ICL). In particular, we highlight the utility of the SAE-reconstruction loss by showing that it provides a valuable signal in ICL—exhibiting a strong correlation with LLM performance and offering a powerful unsupervised approach for prompt selection. These findings underscore the versatility of SAEs and reveal their potential for real-world applications beyond interpretability. Our code is available at https://github.com/ihcho2/SAE-GPS.</abstract>
<identifier type="citekey">cho-etal-2025-versatility</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1063/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>19531</start>
<end>19538</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Versatility of Sparse Autoencoders for In-Context Learning
%A Cho, Ikhyun
%A Kwon, Gaeul
%A Hockenmaier, Julia
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F cho-etal-2025-versatility
%X Sparse autoencoders (SAEs) are emerging as a key analytical tool in the field of mechanistic interpretability for large language models (LLMs). While SAEs have primarily been used for interpretability, we shift focus and explore an understudied question: “Can SAEs be applied to practical tasks beyond interpretability?” Given that SAEs are trained on billions of tokens for sparse reconstruction, we believe they can serve as effective extractors, offering a wide range of useful knowledge that can benefit practical applications. Building on this motivation, we demonstrate that SAEs can be effectively applied to in-context learning (ICL). In particular, we highlight the utility of the SAE-reconstruction loss by showing that it provides a valuable signal in ICL—exhibiting a strong correlation with LLM performance and offering a powerful unsupervised approach for prompt selection. These findings underscore the versatility of SAEs and reveal their potential for real-world applications beyond interpretability. Our code is available at https://github.com/ihcho2/SAE-GPS.
%U https://aclanthology.org/2025.findings-emnlp.1063/
%P 19531-19538
Markdown (Informal)
[On the Versatility of Sparse Autoencoders for In-Context Learning](https://aclanthology.org/2025.findings-emnlp.1063/) (Cho et al., Findings 2025)
ACL