@article{sridhar-etal-2022-heterogeneous,
title = "Heterogeneous Supervised Topic Models",
author = "Sridhar, Dhanya and
Daum{\'e} III, Hal and
Blei, David",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.42",
doi = "10.1162/tacl_a_00487",
pages = "732--745",
abstract = "Researchers in the social sciences are often interested in the relationship between text and an outcome of interest, where the goal is to both uncover latent patterns in the text and predict outcomes for unseen texts. To this end, this paper develops the heterogeneous supervised topic model (HSTM), a probabilistic approach to text analysis and prediction. HSTMs posit a joint model of text and outcomes to find heterogeneous patterns that help with both text analysis and prediction. The main benefit of HSTMs is that they capture heterogeneity in the relationship between text and the outcome across latent topics. To fit HSTMs, we develop a variational inference algorithm based on the auto-encoding variational Bayes framework. We study the performance of HSTMs on eight datasets and find that they consistently outperform related methods, including fine-tuned black-box models. Finally, we apply HSTMs to analyze news articles labeled with pro- or anti-tone. We find evidence of differing language used to signal a pro- and anti-tone.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sridhar-etal-2022-heterogeneous">
<titleInfo>
<title>Heterogeneous Supervised Topic Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dhanya</namePart>
<namePart type="family">Sridhar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hal</namePart>
<namePart type="family">Daumé III</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Blei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Researchers in the social sciences are often interested in the relationship between text and an outcome of interest, where the goal is to both uncover latent patterns in the text and predict outcomes for unseen texts. To this end, this paper develops the heterogeneous supervised topic model (HSTM), a probabilistic approach to text analysis and prediction. HSTMs posit a joint model of text and outcomes to find heterogeneous patterns that help with both text analysis and prediction. The main benefit of HSTMs is that they capture heterogeneity in the relationship between text and the outcome across latent topics. To fit HSTMs, we develop a variational inference algorithm based on the auto-encoding variational Bayes framework. We study the performance of HSTMs on eight datasets and find that they consistently outperform related methods, including fine-tuned black-box models. Finally, we apply HSTMs to analyze news articles labeled with pro- or anti-tone. We find evidence of differing language used to signal a pro- and anti-tone.</abstract>
<identifier type="citekey">sridhar-etal-2022-heterogeneous</identifier>
<identifier type="doi">10.1162/tacl_a_00487</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.42</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>732</start>
<end>745</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Heterogeneous Supervised Topic Models
%A Sridhar, Dhanya
%A Daumé III, Hal
%A Blei, David
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F sridhar-etal-2022-heterogeneous
%X Researchers in the social sciences are often interested in the relationship between text and an outcome of interest, where the goal is to both uncover latent patterns in the text and predict outcomes for unseen texts. To this end, this paper develops the heterogeneous supervised topic model (HSTM), a probabilistic approach to text analysis and prediction. HSTMs posit a joint model of text and outcomes to find heterogeneous patterns that help with both text analysis and prediction. The main benefit of HSTMs is that they capture heterogeneity in the relationship between text and the outcome across latent topics. To fit HSTMs, we develop a variational inference algorithm based on the auto-encoding variational Bayes framework. We study the performance of HSTMs on eight datasets and find that they consistently outperform related methods, including fine-tuned black-box models. Finally, we apply HSTMs to analyze news articles labeled with pro- or anti-tone. We find evidence of differing language used to signal a pro- and anti-tone.
%R 10.1162/tacl_a_00487
%U https://aclanthology.org/2022.tacl-1.42
%U https://doi.org/10.1162/tacl_a_00487
%P 732-745
Markdown (Informal)
[Heterogeneous Supervised Topic Models](https://aclanthology.org/2022.tacl-1.42) (Sridhar et al., TACL 2022)
ACL