@inproceedings{carvalho-etal-2025-langvae,
title = "{L}ang{VAE} and {L}ang{S}pace: Building and Probing for Language Model {VAE}s",
author = "Carvalho, Danilo and
Zhang, Yingji and
Unsworth, Harriet and
Freitas, Andre",
editor = {Habernal, Ivan and
Schulam, Peter and
Tiedemann, J{\"o}rg},
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-demos.57/",
pages = "749--759",
ISBN = "979-8-89176-334-0",
abstract = "We present LangVAE, a novel framework for modular construction of variational autoencoders (VAEs) on top of pre-trained large language models (LLMs). Such language model VAEs can encode the knowledge of their pre-trained components into more compact and semantically disentangled representations. The representations obtained in this way can be analysed with the LangVAE companion framework: LangSpace, which implements a collection of probing methods, such as vector traversal and interpolation, disentanglement measures, and cluster visualisations. LangVAE and LangSpace offer a flexible, efficient and scalable way of building and analysing textual representations, with simple integration for models available on the HuggingFace Hub. Additionally, we conducted a set of experiments with different encoder and decoder combinations, as well as annotated inputs, revealing a wide range of interactions across architectural families and sizes w.r.t.generalisation and disentanglement. Our findings demonstrate a promising framework for systematising the experimentation and understanding of textual representations."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="carvalho-etal-2025-langvae">
<titleInfo>
<title>LangVAE and LangSpace: Building and Probing for Language Model VAEs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Carvalho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yingji</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harriet</namePart>
<namePart type="family">Unsworth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Habernal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Schulam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-334-0</identifier>
</relatedItem>
<abstract>We present LangVAE, a novel framework for modular construction of variational autoencoders (VAEs) on top of pre-trained large language models (LLMs). Such language model VAEs can encode the knowledge of their pre-trained components into more compact and semantically disentangled representations. The representations obtained in this way can be analysed with the LangVAE companion framework: LangSpace, which implements a collection of probing methods, such as vector traversal and interpolation, disentanglement measures, and cluster visualisations. LangVAE and LangSpace offer a flexible, efficient and scalable way of building and analysing textual representations, with simple integration for models available on the HuggingFace Hub. Additionally, we conducted a set of experiments with different encoder and decoder combinations, as well as annotated inputs, revealing a wide range of interactions across architectural families and sizes w.r.t.generalisation and disentanglement. Our findings demonstrate a promising framework for systematising the experimentation and understanding of textual representations.</abstract>
<identifier type="citekey">carvalho-etal-2025-langvae</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-demos.57/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>749</start>
<end>759</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LangVAE and LangSpace: Building and Probing for Language Model VAEs
%A Carvalho, Danilo
%A Zhang, Yingji
%A Unsworth, Harriet
%A Freitas, Andre
%Y Habernal, Ivan
%Y Schulam, Peter
%Y Tiedemann, Jörg
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-334-0
%F carvalho-etal-2025-langvae
%X We present LangVAE, a novel framework for modular construction of variational autoencoders (VAEs) on top of pre-trained large language models (LLMs). Such language model VAEs can encode the knowledge of their pre-trained components into more compact and semantically disentangled representations. The representations obtained in this way can be analysed with the LangVAE companion framework: LangSpace, which implements a collection of probing methods, such as vector traversal and interpolation, disentanglement measures, and cluster visualisations. LangVAE and LangSpace offer a flexible, efficient and scalable way of building and analysing textual representations, with simple integration for models available on the HuggingFace Hub. Additionally, we conducted a set of experiments with different encoder and decoder combinations, as well as annotated inputs, revealing a wide range of interactions across architectural families and sizes w.r.t.generalisation and disentanglement. Our findings demonstrate a promising framework for systematising the experimentation and understanding of textual representations.
%U https://aclanthology.org/2025.emnlp-demos.57/
%P 749-759
Markdown (Informal)
[LangVAE and LangSpace: Building and Probing for Language Model VAEs](https://aclanthology.org/2025.emnlp-demos.57/) (Carvalho et al., EMNLP 2025)
ACL