@article{nguyen-etal-2015-improving-topic,
title = "Improving Topic Models with Latent Feature Word Representations",
author = "Nguyen, Dat Quoc and
Billingsley, Richard and
Du, Lan and
Johnson, Mark",
editor = "Collins, Michael and
Lee, Lillian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "3",
year = "2015",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q15-1022",
doi = "10.1162/tacl_a_00140",
pages = "299--313",
abstract = "Probabilistic topic models are widely used to discover latent topics in document collections, while latent feature vector representations of words have been used to obtain high performance in many NLP tasks. In this paper, we extend two different Dirichlet multinomial topic models by incorporating latent feature vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Experimental results show that by using information from the external corpora, our new models produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-etal-2015-improving-topic">
<titleInfo>
<title>Improving Topic Models with Latent Feature Word Representations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dat</namePart>
<namePart type="given">Quoc</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Billingsley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lan</namePart>
<namePart type="family">Du</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2015</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Probabilistic topic models are widely used to discover latent topics in document collections, while latent feature vector representations of words have been used to obtain high performance in many NLP tasks. In this paper, we extend two different Dirichlet multinomial topic models by incorporating latent feature vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Experimental results show that by using information from the external corpora, our new models produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.</abstract>
<identifier type="citekey">nguyen-etal-2015-improving-topic</identifier>
<identifier type="doi">10.1162/tacl_a_00140</identifier>
<location>
<url>https://aclanthology.org/Q15-1022</url>
</location>
<part>
<date>2015</date>
<detail type="volume"><number>3</number></detail>
<extent unit="page">
<start>299</start>
<end>313</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Improving Topic Models with Latent Feature Word Representations
%A Nguyen, Dat Quoc
%A Billingsley, Richard
%A Du, Lan
%A Johnson, Mark
%J Transactions of the Association for Computational Linguistics
%D 2015
%V 3
%I MIT Press
%C Cambridge, MA
%F nguyen-etal-2015-improving-topic
%X Probabilistic topic models are widely used to discover latent topics in document collections, while latent feature vector representations of words have been used to obtain high performance in many NLP tasks. In this paper, we extend two different Dirichlet multinomial topic models by incorporating latent feature vector representations of words trained on very large corpora to improve the word-topic mapping learnt on a smaller corpus. Experimental results show that by using information from the external corpora, our new models produce significant improvements on topic coherence, document clustering and document classification tasks, especially on datasets with few or short documents.
%R 10.1162/tacl_a_00140
%U https://aclanthology.org/Q15-1022
%U https://doi.org/10.1162/tacl_a_00140
%P 299-313
Markdown (Informal)
[Improving Topic Models with Latent Feature Word Representations](https://aclanthology.org/Q15-1022) (Nguyen et al., TACL 2015)
ACL