@inproceedings{al-saeedi-harma-2025-emergence,
title = "Emergence of symbolic abstraction heads for in-context learning in large language models",
author = "Al-Saeedi, Ali and
Harma, Aki",
editor = "Liu, Kang and
Song, Yangqiu and
Han, Zhen and
Sifa, Rafet and
He, Shizhu and
Long, Yunfei",
booktitle = "Proceedings of Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning @ COLING 2025",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2025.neusymbridge-1.9/",
pages = "86--96",
abstract = "Large Language Models (LLMs) based on self-attention circuits are able to perform, at inference time, novel reasoning tasks, but the mechanisms inside the models are currently not fully understood. We assume that LLMs are able to generalize abstract patterns from the input and form an internal symbolic internal representation of the content. In this paper, we study this by analyzing the performance of small LLM models trained with sequences of instantiations of abstract sequential symbolic patterns or templates. It is shown that even a model with two layers is able to learn an abstract template and use it to generate correct output representing the pattern. This can be seen as a form of symbolic inference taking place inside the network. In this paper, we call the emergent mechanism abstraction head. Identifying mechanisms of symbolic reasoning in a neural network can help to find new ways to merge symbolic and neural processing."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="al-saeedi-harma-2025-emergence">
<titleInfo>
<title>Emergence of symbolic abstraction heads for in-context learning in large language models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Al-Saeedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aki</namePart>
<namePart type="family">Harma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning @ COLING 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yangqiu</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhen</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rafet</namePart>
<namePart type="family">Sifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shizhu</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yunfei</namePart>
<namePart type="family">Long</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) based on self-attention circuits are able to perform, at inference time, novel reasoning tasks, but the mechanisms inside the models are currently not fully understood. We assume that LLMs are able to generalize abstract patterns from the input and form an internal symbolic internal representation of the content. In this paper, we study this by analyzing the performance of small LLM models trained with sequences of instantiations of abstract sequential symbolic patterns or templates. It is shown that even a model with two layers is able to learn an abstract template and use it to generate correct output representing the pattern. This can be seen as a form of symbolic inference taking place inside the network. In this paper, we call the emergent mechanism abstraction head. Identifying mechanisms of symbolic reasoning in a neural network can help to find new ways to merge symbolic and neural processing.</abstract>
<identifier type="citekey">al-saeedi-harma-2025-emergence</identifier>
<location>
<url>https://aclanthology.org/2025.neusymbridge-1.9/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>86</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Emergence of symbolic abstraction heads for in-context learning in large language models
%A Al-Saeedi, Ali
%A Harma, Aki
%Y Liu, Kang
%Y Song, Yangqiu
%Y Han, Zhen
%Y Sifa, Rafet
%Y He, Shizhu
%Y Long, Yunfei
%S Proceedings of Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning @ COLING 2025
%D 2025
%8 January
%I ELRA and ICCL
%C Abu Dhabi, UAE
%F al-saeedi-harma-2025-emergence
%X Large Language Models (LLMs) based on self-attention circuits are able to perform, at inference time, novel reasoning tasks, but the mechanisms inside the models are currently not fully understood. We assume that LLMs are able to generalize abstract patterns from the input and form an internal symbolic internal representation of the content. In this paper, we study this by analyzing the performance of small LLM models trained with sequences of instantiations of abstract sequential symbolic patterns or templates. It is shown that even a model with two layers is able to learn an abstract template and use it to generate correct output representing the pattern. This can be seen as a form of symbolic inference taking place inside the network. In this paper, we call the emergent mechanism abstraction head. Identifying mechanisms of symbolic reasoning in a neural network can help to find new ways to merge symbolic and neural processing.
%U https://aclanthology.org/2025.neusymbridge-1.9/
%P 86-96
Markdown (Informal)
[Emergence of symbolic abstraction heads for in-context learning in large language models](https://aclanthology.org/2025.neusymbridge-1.9/) (Al-Saeedi & Harma, NeusymBridge 2025)
ACL