@inproceedings{kardos-etal-2025-s3,
title = "$S^3$ - Semantic Signal Separation",
author = "Kardos, M{\'a}rton and
Kostkan, Jan and
Enevoldsen, Kenneth and
Vermillet, Arnault-Quentin and
Nielbo, Kristoffer and
Rocca, Roberta",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.32/",
doi = "10.18653/v1/2025.acl-long.32",
pages = "633--666",
ISBN = "979-8-89176-251-0",
abstract = "Topic models are useful tools for discovering latent semantic structures in large textual corpora. Recent efforts have been oriented at incorporating contextual representations in topic modeling and have been shown to outperform classical topic models. These approaches are typically slow, volatile, and require heavy preprocessing for optimal results. We present Semantic Signal Separation ($S^3$), a theory-driven topic modeling approach in neural embedding spaces. $S^3$ conceptualizes topics as independent axes of semantic space and uncovers these by decomposing contextualized document embeddings using Independent Component Analysis. Our approach provides diverse and highly coherent topics, requires no preprocessing, and is demonstrated to be the fastest contextual topic model, being, on average, 4.5x faster than the runner-up BERTopic. We offer an implementation of $S^3$, and all contextual baselines, in the Turftopic Python package."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kardos-etal-2025-s3">
<titleInfo>
<title>S³ - Semantic Signal Separation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Márton</namePart>
<namePart type="family">Kardos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Kostkan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Enevoldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arnault-Quentin</namePart>
<namePart type="family">Vermillet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristoffer</namePart>
<namePart type="family">Nielbo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberta</namePart>
<namePart type="family">Rocca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Topic models are useful tools for discovering latent semantic structures in large textual corpora. Recent efforts have been oriented at incorporating contextual representations in topic modeling and have been shown to outperform classical topic models. These approaches are typically slow, volatile, and require heavy preprocessing for optimal results. We present Semantic Signal Separation (S³), a theory-driven topic modeling approach in neural embedding spaces. S³ conceptualizes topics as independent axes of semantic space and uncovers these by decomposing contextualized document embeddings using Independent Component Analysis. Our approach provides diverse and highly coherent topics, requires no preprocessing, and is demonstrated to be the fastest contextual topic model, being, on average, 4.5x faster than the runner-up BERTopic. We offer an implementation of S³, and all contextual baselines, in the Turftopic Python package.</abstract>
<identifier type="citekey">kardos-etal-2025-s3</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.32</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.32/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>633</start>
<end>666</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T S³ - Semantic Signal Separation
%A Kardos, Márton
%A Kostkan, Jan
%A Enevoldsen, Kenneth
%A Vermillet, Arnault-Quentin
%A Nielbo, Kristoffer
%A Rocca, Roberta
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F kardos-etal-2025-s3
%X Topic models are useful tools for discovering latent semantic structures in large textual corpora. Recent efforts have been oriented at incorporating contextual representations in topic modeling and have been shown to outperform classical topic models. These approaches are typically slow, volatile, and require heavy preprocessing for optimal results. We present Semantic Signal Separation (S³), a theory-driven topic modeling approach in neural embedding spaces. S³ conceptualizes topics as independent axes of semantic space and uncovers these by decomposing contextualized document embeddings using Independent Component Analysis. Our approach provides diverse and highly coherent topics, requires no preprocessing, and is demonstrated to be the fastest contextual topic model, being, on average, 4.5x faster than the runner-up BERTopic. We offer an implementation of S³, and all contextual baselines, in the Turftopic Python package.
%R 10.18653/v1/2025.acl-long.32
%U https://aclanthology.org/2025.acl-long.32/
%U https://doi.org/10.18653/v1/2025.acl-long.32
%P 633-666
Markdown (Informal)
[S3 - Semantic Signal Separation](https://aclanthology.org/2025.acl-long.32/) (Kardos et al., ACL 2025)
ACL
- Márton Kardos, Jan Kostkan, Kenneth Enevoldsen, Arnault-Quentin Vermillet, Kristoffer Nielbo, and Roberta Rocca. 2025. S3 - Semantic Signal Separation. In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 633–666, Vienna, Austria. Association for Computational Linguistics.