@inproceedings{teimouri-etal-2025-deep,
title = "A Deep Dive into Multi-Head Attention and Multi-Aspect Embedding",
author = "Teimouri, Maryam and
Kanerva, Jenna and
Ginter, Filip",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.146/",
pages = "1263--1270",
abstract = "Multi-vector embedding models play an increasingly important role in retrieval-augmented generation, yet their internal behaviour lacks comprehensive analysis. We conduct a systematic, head-level study of the 32-head Semantic Feature Representation (SFR) encoder with the FineWeb corpus containing 10 billion tokens. For a set of 4,000 web documents, we pair head-specific embeddings with GPT-4o topic annotations and analyse the results using t-SNE visualisations, heat maps, and a 32-way logistic probe. The analysis shows that (i) clear semantic separation between heads emerges only at an intermediate layer, (ii) some heads align with specific topics while others capture broader corpus features, and (iii) naive pooling of head outputs can blur these distinctions, leading to frequent topic mismatches. The study offers practical guidance on where to extract embeddings, which heads may be pruned, and how to aggregate them to support more transparent and controllable retrieval pipelines."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="teimouri-etal-2025-deep">
<titleInfo>
<title>A Deep Dive into Multi-Head Attention and Multi-Aspect Embedding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maryam</namePart>
<namePart type="family">Teimouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenna</namePart>
<namePart type="family">Kanerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Ginter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multi-vector embedding models play an increasingly important role in retrieval-augmented generation, yet their internal behaviour lacks comprehensive analysis. We conduct a systematic, head-level study of the 32-head Semantic Feature Representation (SFR) encoder with the FineWeb corpus containing 10 billion tokens. For a set of 4,000 web documents, we pair head-specific embeddings with GPT-4o topic annotations and analyse the results using t-SNE visualisations, heat maps, and a 32-way logistic probe. The analysis shows that (i) clear semantic separation between heads emerges only at an intermediate layer, (ii) some heads align with specific topics while others capture broader corpus features, and (iii) naive pooling of head outputs can blur these distinctions, leading to frequent topic mismatches. The study offers practical guidance on where to extract embeddings, which heads may be pruned, and how to aggregate them to support more transparent and controllable retrieval pipelines.</abstract>
<identifier type="citekey">teimouri-etal-2025-deep</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.146/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>1263</start>
<end>1270</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Deep Dive into Multi-Head Attention and Multi-Aspect Embedding
%A Teimouri, Maryam
%A Kanerva, Jenna
%A Ginter, Filip
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F teimouri-etal-2025-deep
%X Multi-vector embedding models play an increasingly important role in retrieval-augmented generation, yet their internal behaviour lacks comprehensive analysis. We conduct a systematic, head-level study of the 32-head Semantic Feature Representation (SFR) encoder with the FineWeb corpus containing 10 billion tokens. For a set of 4,000 web documents, we pair head-specific embeddings with GPT-4o topic annotations and analyse the results using t-SNE visualisations, heat maps, and a 32-way logistic probe. The analysis shows that (i) clear semantic separation between heads emerges only at an intermediate layer, (ii) some heads align with specific topics while others capture broader corpus features, and (iii) naive pooling of head outputs can blur these distinctions, leading to frequent topic mismatches. The study offers practical guidance on where to extract embeddings, which heads may be pruned, and how to aggregate them to support more transparent and controllable retrieval pipelines.
%U https://aclanthology.org/2025.ranlp-1.146/
%P 1263-1270
Markdown (Informal)
[A Deep Dive into Multi-Head Attention and Multi-Aspect Embedding](https://aclanthology.org/2025.ranlp-1.146/) (Teimouri et al., RANLP 2025)
ACL
- Maryam Teimouri, Jenna Kanerva, and Filip Ginter. 2025. A Deep Dive into Multi-Head Attention and Multi-Aspect Embedding. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 1263–1270, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.