@inproceedings{chiu-etal-2020-autoencoding,
title = "Autoencoding Keyword Correlation Graph for Document Clustering",
author = "Chiu, Billy and
Sahu, Sunil Kumar and
Thomas, Derek and
Sengupta, Neha and
Mahdy, Mohammady",
editor = "Jurafsky, Dan and
Chai, Joyce and
Schluter, Natalie and
Tetreault, Joel",
booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.acl-main.366",
doi = "10.18653/v1/2020.acl-main.366",
pages = "3974--3981",
abstract = "Document clustering requires a deep understanding of the complex structure of long-text; in particular, the intra-sentential (local) and inter-sentential features (global). Existing representation learning models do not fully capture these features. To address this, we present a novel graph-based representation for document clustering that builds a \textit{graph autoencoder} (GAE) on a Keyword Correlation Graph. The graph is constructed with topical keywords as nodes and multiple local and global features as edges. A GAE is employed to aggregate the two sets of features by learning a latent representation which can jointly reconstruct them. Clustering is then performed on the learned representations, using vector dimensions as features for inducing document classes. Extensive experiments on two datasets show that the features learned by our approach can achieve better clustering performance than other existing features, including term frequency-inverse document frequency and average embedding.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chiu-etal-2020-autoencoding">
<titleInfo>
<title>Autoencoding Keyword Correlation Graph for Document Clustering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Billy</namePart>
<namePart type="family">Chiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunil</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Sahu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Derek</namePart>
<namePart type="family">Thomas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neha</namePart>
<namePart type="family">Sengupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammady</namePart>
<namePart type="family">Mahdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dan</namePart>
<namePart type="family">Jurafsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Chai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Natalie</namePart>
<namePart type="family">Schluter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Document clustering requires a deep understanding of the complex structure of long-text; in particular, the intra-sentential (local) and inter-sentential features (global). Existing representation learning models do not fully capture these features. To address this, we present a novel graph-based representation for document clustering that builds a graph autoencoder (GAE) on a Keyword Correlation Graph. The graph is constructed with topical keywords as nodes and multiple local and global features as edges. A GAE is employed to aggregate the two sets of features by learning a latent representation which can jointly reconstruct them. Clustering is then performed on the learned representations, using vector dimensions as features for inducing document classes. Extensive experiments on two datasets show that the features learned by our approach can achieve better clustering performance than other existing features, including term frequency-inverse document frequency and average embedding.</abstract>
<identifier type="citekey">chiu-etal-2020-autoencoding</identifier>
<identifier type="doi">10.18653/v1/2020.acl-main.366</identifier>
<location>
<url>https://aclanthology.org/2020.acl-main.366</url>
</location>
<part>
<date>2020-07</date>
<extent unit="page">
<start>3974</start>
<end>3981</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Autoencoding Keyword Correlation Graph for Document Clustering
%A Chiu, Billy
%A Sahu, Sunil Kumar
%A Thomas, Derek
%A Sengupta, Neha
%A Mahdy, Mohammady
%Y Jurafsky, Dan
%Y Chai, Joyce
%Y Schluter, Natalie
%Y Tetreault, Joel
%S Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics
%D 2020
%8 July
%I Association for Computational Linguistics
%C Online
%F chiu-etal-2020-autoencoding
%X Document clustering requires a deep understanding of the complex structure of long-text; in particular, the intra-sentential (local) and inter-sentential features (global). Existing representation learning models do not fully capture these features. To address this, we present a novel graph-based representation for document clustering that builds a graph autoencoder (GAE) on a Keyword Correlation Graph. The graph is constructed with topical keywords as nodes and multiple local and global features as edges. A GAE is employed to aggregate the two sets of features by learning a latent representation which can jointly reconstruct them. Clustering is then performed on the learned representations, using vector dimensions as features for inducing document classes. Extensive experiments on two datasets show that the features learned by our approach can achieve better clustering performance than other existing features, including term frequency-inverse document frequency and average embedding.
%R 10.18653/v1/2020.acl-main.366
%U https://aclanthology.org/2020.acl-main.366
%U https://doi.org/10.18653/v1/2020.acl-main.366
%P 3974-3981
Markdown (Informal)
[Autoencoding Keyword Correlation Graph for Document Clustering](https://aclanthology.org/2020.acl-main.366) (Chiu et al., ACL 2020)
ACL