@article{gallagher-etal-2017-anchored,
title = "Anchored Correlation Explanation: Topic Modeling with Minimal Domain Knowledge",
author = "Gallagher, Ryan J. and
Reing, Kyle and
Kale, David and
Ver Steeg, Greg",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina",
journal = "Transactions of the Association for Computational Linguistics",
volume = "5",
year = "2017",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q17-1037",
doi = "10.1162/tacl_a_00078",
pages = "529--542",
abstract = "While generative models such as Latent Dirichlet Allocation (LDA) have proven fruitful in topic modeling, they often require detailed assumptions and careful specification of hyperparameters. Such model complexity issues only compound when trying to generalize generative models to incorporate human input. We introduce Correlation Explanation (CorEx), an alternative approach to topic modeling that does not assume an underlying generative model, and instead learns maximally informative topics through an information-theoretic framework. This framework naturally generalizes to hierarchical and semi-supervised extensions with no additional modeling assumptions. In particular, word-level domain knowledge can be flexibly incorporated within CorEx through anchor words, allowing topic separability and representation to be promoted with minimal human intervention. Across a variety of datasets, metrics, and experiments, we demonstrate that CorEx produces topics that are comparable in quality to those produced by unsupervised and semi-supervised variants of LDA.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gallagher-etal-2017-anchored">
<titleInfo>
<title>Anchored Correlation Explanation: Topic Modeling with Minimal Domain Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="given">J</namePart>
<namePart type="family">Gallagher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyle</namePart>
<namePart type="family">Reing</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Kale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Greg</namePart>
<namePart type="family">Ver Steeg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>While generative models such as Latent Dirichlet Allocation (LDA) have proven fruitful in topic modeling, they often require detailed assumptions and careful specification of hyperparameters. Such model complexity issues only compound when trying to generalize generative models to incorporate human input. We introduce Correlation Explanation (CorEx), an alternative approach to topic modeling that does not assume an underlying generative model, and instead learns maximally informative topics through an information-theoretic framework. This framework naturally generalizes to hierarchical and semi-supervised extensions with no additional modeling assumptions. In particular, word-level domain knowledge can be flexibly incorporated within CorEx through anchor words, allowing topic separability and representation to be promoted with minimal human intervention. Across a variety of datasets, metrics, and experiments, we demonstrate that CorEx produces topics that are comparable in quality to those produced by unsupervised and semi-supervised variants of LDA.</abstract>
<identifier type="citekey">gallagher-etal-2017-anchored</identifier>
<identifier type="doi">10.1162/tacl_a_00078</identifier>
<location>
<url>https://aclanthology.org/Q17-1037</url>
</location>
<part>
<date>2017</date>
<detail type="volume"><number>5</number></detail>
<extent unit="page">
<start>529</start>
<end>542</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Anchored Correlation Explanation: Topic Modeling with Minimal Domain Knowledge
%A Gallagher, Ryan J.
%A Reing, Kyle
%A Kale, David
%A Ver Steeg, Greg
%J Transactions of the Association for Computational Linguistics
%D 2017
%V 5
%I MIT Press
%C Cambridge, MA
%F gallagher-etal-2017-anchored
%X While generative models such as Latent Dirichlet Allocation (LDA) have proven fruitful in topic modeling, they often require detailed assumptions and careful specification of hyperparameters. Such model complexity issues only compound when trying to generalize generative models to incorporate human input. We introduce Correlation Explanation (CorEx), an alternative approach to topic modeling that does not assume an underlying generative model, and instead learns maximally informative topics through an information-theoretic framework. This framework naturally generalizes to hierarchical and semi-supervised extensions with no additional modeling assumptions. In particular, word-level domain knowledge can be flexibly incorporated within CorEx through anchor words, allowing topic separability and representation to be promoted with minimal human intervention. Across a variety of datasets, metrics, and experiments, we demonstrate that CorEx produces topics that are comparable in quality to those produced by unsupervised and semi-supervised variants of LDA.
%R 10.1162/tacl_a_00078
%U https://aclanthology.org/Q17-1037
%U https://doi.org/10.1162/tacl_a_00078
%P 529-542
Markdown (Informal)
[Anchored Correlation Explanation: Topic Modeling with Minimal Domain Knowledge](https://aclanthology.org/Q17-1037) (Gallagher et al., TACL 2017)
ACL