@inproceedings{li-etal-2025-explicit,
title = "Explicit {B}ayesian Inference to Uncover the Latent Themes of Large Language Models",
author = "Li, Raymond and
Li, Chuyuan and
Murray, Gabriel and
Carenini, Giuseppe",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.1123/",
doi = "10.18653/v1/2025.findings-acl.1123",
pages = "21819--21833",
ISBN = "979-8-89176-256-5",
abstract = "Large language models (LLMs) have demonstrated impressive generative capabilities, yet their inner mechanisms remain largely opaque. In this work, we introduce a novel approach to interpret LLMs generation process through the lens of an explicit Bayesian framework by inferring latent topic variables via variational inference. Specifically, we leverage a variational autoencoder-based neural topic model to dynamically approximate the posterior distribution over the high-level latent topic variables at each generation step. By reconstructing the LLM{'}s next-token predictions through these latent topics and maintaining a regularized latent space, our method yields interpretable and diverse topic representations but also has the ability to effectively captures semantic shifts throughout the text. We validate our approach on multiple datasets, showing that our latent topics outperform state-of-the-art topic models on intrinsic measures of coherence and diversity. Furthermore, we demonstrate the utility of our approach in downstream applications by using the inferred topic distributions to retrieve relevant demonstration examples for in-context learning, resulting in significant gains on classification and summarization tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-explicit">
<titleInfo>
<title>Explicit Bayesian Inference to Uncover the Latent Themes of Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raymond</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chuyuan</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gabriel</namePart>
<namePart type="family">Murray</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Carenini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have demonstrated impressive generative capabilities, yet their inner mechanisms remain largely opaque. In this work, we introduce a novel approach to interpret LLMs generation process through the lens of an explicit Bayesian framework by inferring latent topic variables via variational inference. Specifically, we leverage a variational autoencoder-based neural topic model to dynamically approximate the posterior distribution over the high-level latent topic variables at each generation step. By reconstructing the LLM’s next-token predictions through these latent topics and maintaining a regularized latent space, our method yields interpretable and diverse topic representations but also has the ability to effectively captures semantic shifts throughout the text. We validate our approach on multiple datasets, showing that our latent topics outperform state-of-the-art topic models on intrinsic measures of coherence and diversity. Furthermore, we demonstrate the utility of our approach in downstream applications by using the inferred topic distributions to retrieve relevant demonstration examples for in-context learning, resulting in significant gains on classification and summarization tasks.</abstract>
<identifier type="citekey">li-etal-2025-explicit</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.1123</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.1123/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>21819</start>
<end>21833</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Explicit Bayesian Inference to Uncover the Latent Themes of Large Language Models
%A Li, Raymond
%A Li, Chuyuan
%A Murray, Gabriel
%A Carenini, Giuseppe
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F li-etal-2025-explicit
%X Large language models (LLMs) have demonstrated impressive generative capabilities, yet their inner mechanisms remain largely opaque. In this work, we introduce a novel approach to interpret LLMs generation process through the lens of an explicit Bayesian framework by inferring latent topic variables via variational inference. Specifically, we leverage a variational autoencoder-based neural topic model to dynamically approximate the posterior distribution over the high-level latent topic variables at each generation step. By reconstructing the LLM’s next-token predictions through these latent topics and maintaining a regularized latent space, our method yields interpretable and diverse topic representations but also has the ability to effectively captures semantic shifts throughout the text. We validate our approach on multiple datasets, showing that our latent topics outperform state-of-the-art topic models on intrinsic measures of coherence and diversity. Furthermore, we demonstrate the utility of our approach in downstream applications by using the inferred topic distributions to retrieve relevant demonstration examples for in-context learning, resulting in significant gains on classification and summarization tasks.
%R 10.18653/v1/2025.findings-acl.1123
%U https://aclanthology.org/2025.findings-acl.1123/
%U https://doi.org/10.18653/v1/2025.findings-acl.1123
%P 21819-21833
Markdown (Informal)
[Explicit Bayesian Inference to Uncover the Latent Themes of Large Language Models](https://aclanthology.org/2025.findings-acl.1123/) (Li et al., Findings 2025)
ACL