@inproceedings{vu-etal-2025-topic,
title = "Topic Modeling for Short Texts via Optimal Transport-Based Clustering",
author = "Vu, Tu and
Do, Manh and
Nguyen, Tung and
Van, Linh Ngo and
Dinh, Sang and
Nguyen, Thien Huu",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.398/",
doi = "10.18653/v1/2025.findings-acl.398",
pages = "7666--7680",
ISBN = "979-8-89176-256-5",
abstract = "Discovering topics and learning document representations in topic space are two crucial aspects of topic modeling, particularly in the short-text setting, where inferring topic proportions for individual documents is highly challenging. Despite significant progress in neural topic modeling, effectively distinguishing document representations as well as topic embeddings remains an open problem. In this paper, we propose a novel method called **En**hancing Global **C**lustering with **O**ptimal **T**ransport in Topic Modeling (EnCOT). Our approach utilizes an abstract global clusters concept to capture global information and then employs the Optimal Transport framework to align document representations in the topic space with global clusters, while also aligning global clusters with topics. This dual alignment not only enhances the separation of documents in the topic space but also facilitates learning of latent topics. Through extensive experiments, we demonstrate that our method outperforms state-of-the-art techniques in short-text topic modeling across commonly used metrics."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vu-etal-2025-topic">
<titleInfo>
<title>Topic Modeling for Short Texts via Optimal Transport-Based Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tu</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manh</namePart>
<namePart type="family">Do</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tung</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Linh</namePart>
<namePart type="given">Ngo</namePart>
<namePart type="family">Van</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sang</namePart>
<namePart type="family">Dinh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thien</namePart>
<namePart type="given">Huu</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Discovering topics and learning document representations in topic space are two crucial aspects of topic modeling, particularly in the short-text setting, where inferring topic proportions for individual documents is highly challenging. Despite significant progress in neural topic modeling, effectively distinguishing document representations as well as topic embeddings remains an open problem. In this paper, we propose a novel method called **En**hancing Global **C**lustering with **O**ptimal **T**ransport in Topic Modeling (EnCOT). Our approach utilizes an abstract global clusters concept to capture global information and then employs the Optimal Transport framework to align document representations in the topic space with global clusters, while also aligning global clusters with topics. This dual alignment not only enhances the separation of documents in the topic space but also facilitates learning of latent topics. Through extensive experiments, we demonstrate that our method outperforms state-of-the-art techniques in short-text topic modeling across commonly used metrics.</abstract>
<identifier type="citekey">vu-etal-2025-topic</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.398</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.398/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>7666</start>
<end>7680</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Topic Modeling for Short Texts via Optimal Transport-Based Clustering
%A Vu, Tu
%A Do, Manh
%A Nguyen, Tung
%A Van, Linh Ngo
%A Dinh, Sang
%A Nguyen, Thien Huu
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F vu-etal-2025-topic
%X Discovering topics and learning document representations in topic space are two crucial aspects of topic modeling, particularly in the short-text setting, where inferring topic proportions for individual documents is highly challenging. Despite significant progress in neural topic modeling, effectively distinguishing document representations as well as topic embeddings remains an open problem. In this paper, we propose a novel method called **En**hancing Global **C**lustering with **O**ptimal **T**ransport in Topic Modeling (EnCOT). Our approach utilizes an abstract global clusters concept to capture global information and then employs the Optimal Transport framework to align document representations in the topic space with global clusters, while also aligning global clusters with topics. This dual alignment not only enhances the separation of documents in the topic space but also facilitates learning of latent topics. Through extensive experiments, we demonstrate that our method outperforms state-of-the-art techniques in short-text topic modeling across commonly used metrics.
%R 10.18653/v1/2025.findings-acl.398
%U https://aclanthology.org/2025.findings-acl.398/
%U https://doi.org/10.18653/v1/2025.findings-acl.398
%P 7666-7680
Markdown (Informal)
[Topic Modeling for Short Texts via Optimal Transport-Based Clustering](https://aclanthology.org/2025.findings-acl.398/) (Vu et al., Findings 2025)
ACL