@inproceedings{liu-etal-2024-unsupervised,
title = "Unsupervised Hierarchical Topic Modeling via Anchor Word Clustering and Path Guidance",
author = "Liu, Jiyuan and
Chen, Hegang and
Zhu, Chunjiang and
Rao, Yanghui",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.440/",
doi = "10.18653/v1/2024.findings-emnlp.440",
pages = "7505--7517",
abstract = "Hierarchical topic models nowadays tend to capture the relationship between words and topics, often ignoring the role of anchor words that guide text generation. For the first time, we detect and add anchor words to the text generation process in an unsupervised way. Firstly, we adopt a clustering algorithm to adaptively detect anchor words that are highly consistent with every topic, which forms the path of topic $\rightarrow$ anchor word. Secondly, we add the causal path of anchor word $\rightarrow$ word to the popular Variational Auto-Encoder (VAE) framework via implicitly using word co-occurrence graphs. We develop the causal path of topic+anchor word $\rightarrow$ higher-layer topic that aids the expression of topic concepts with anchor words to capture a more semantically tight hierarchical topic structure. Finally, we enhance the model`s representation of the anchor words through a novel contrastive learning. After jointly training the aforementioned constraint objectives, we can produce more coherent and diverse topics with a better hierarchical structure. Extensive experiments on three datasets show that our model outperforms state-of-the-art methods."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liu-etal-2024-unsupervised">
<titleInfo>
<title>Unsupervised Hierarchical Topic Modeling via Anchor Word Clustering and Path Guidance</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hegang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunjiang</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanghui</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Hierarchical topic models nowadays tend to capture the relationship between words and topics, often ignoring the role of anchor words that guide text generation. For the first time, we detect and add anchor words to the text generation process in an unsupervised way. Firstly, we adopt a clustering algorithm to adaptively detect anchor words that are highly consistent with every topic, which forms the path of topic \rightarrow anchor word. Secondly, we add the causal path of anchor word \rightarrow word to the popular Variational Auto-Encoder (VAE) framework via implicitly using word co-occurrence graphs. We develop the causal path of topic+anchor word \rightarrow higher-layer topic that aids the expression of topic concepts with anchor words to capture a more semantically tight hierarchical topic structure. Finally, we enhance the model‘s representation of the anchor words through a novel contrastive learning. After jointly training the aforementioned constraint objectives, we can produce more coherent and diverse topics with a better hierarchical structure. Extensive experiments on three datasets show that our model outperforms state-of-the-art methods.</abstract>
<identifier type="citekey">liu-etal-2024-unsupervised</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.440</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.440/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>7505</start>
<end>7517</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Unsupervised Hierarchical Topic Modeling via Anchor Word Clustering and Path Guidance
%A Liu, Jiyuan
%A Chen, Hegang
%A Zhu, Chunjiang
%A Rao, Yanghui
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F liu-etal-2024-unsupervised
%X Hierarchical topic models nowadays tend to capture the relationship between words and topics, often ignoring the role of anchor words that guide text generation. For the first time, we detect and add anchor words to the text generation process in an unsupervised way. Firstly, we adopt a clustering algorithm to adaptively detect anchor words that are highly consistent with every topic, which forms the path of topic \rightarrow anchor word. Secondly, we add the causal path of anchor word \rightarrow word to the popular Variational Auto-Encoder (VAE) framework via implicitly using word co-occurrence graphs. We develop the causal path of topic+anchor word \rightarrow higher-layer topic that aids the expression of topic concepts with anchor words to capture a more semantically tight hierarchical topic structure. Finally, we enhance the model‘s representation of the anchor words through a novel contrastive learning. After jointly training the aforementioned constraint objectives, we can produce more coherent and diverse topics with a better hierarchical structure. Extensive experiments on three datasets show that our model outperforms state-of-the-art methods.
%R 10.18653/v1/2024.findings-emnlp.440
%U https://aclanthology.org/2024.findings-emnlp.440/
%U https://doi.org/10.18653/v1/2024.findings-emnlp.440
%P 7505-7517
Markdown (Informal)
[Unsupervised Hierarchical Topic Modeling via Anchor Word Clustering and Path Guidance](https://aclanthology.org/2024.findings-emnlp.440/) (Liu et al., Findings 2024)
ACL