@inproceedings{gao-etal-2021-topic,
title = "Topic Modeling for Maternal Health Using {R}eddit",
author = "Gao, Shuang and
Pandya, Shivani and
Agarwal, Smisha and
Sedoc, Jo{\~a}o",
editor = "Holderness, Eben and
Jimeno Yepes, Antonio and
Lavelli, Alberto and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis",
month = apr,
year = "2021",
address = "online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.louhi-1.8",
pages = "69--76",
abstract = "This paper applies topic modeling to understand maternal health topics, concerns, and questions expressed in online communities on social networking sites. We examine Latent Dirichlet Analysis (LDA) and two state-of-the-art methods: neural topic model with knowledge distillation (KD) and Embedded Topic Model (ETM) on maternal health texts collected from Reddit. The models are evaluated on topic quality and topic inference, using both auto-evaluation metrics and human assessment. We analyze a disconnect between automatic metrics and human evaluations. While LDA performs the best overall with the auto-evaluation metrics NPMI and Coherence, Neural Topic Model with Knowledge Distillation is favorable by expert evaluation. We also create a new partially expert annotated gold-standard maternal health topic",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gao-etal-2021-topic">
<titleInfo>
<title>Topic Modeling for Maternal Health Using Reddit</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shuang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivani</namePart>
<namePart type="family">Pandya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Smisha</namePart>
<namePart type="family">Agarwal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper applies topic modeling to understand maternal health topics, concerns, and questions expressed in online communities on social networking sites. We examine Latent Dirichlet Analysis (LDA) and two state-of-the-art methods: neural topic model with knowledge distillation (KD) and Embedded Topic Model (ETM) on maternal health texts collected from Reddit. The models are evaluated on topic quality and topic inference, using both auto-evaluation metrics and human assessment. We analyze a disconnect between automatic metrics and human evaluations. While LDA performs the best overall with the auto-evaluation metrics NPMI and Coherence, Neural Topic Model with Knowledge Distillation is favorable by expert evaluation. We also create a new partially expert annotated gold-standard maternal health topic</abstract>
<identifier type="citekey">gao-etal-2021-topic</identifier>
<location>
<url>https://aclanthology.org/2021.louhi-1.8</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>69</start>
<end>76</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Topic Modeling for Maternal Health Using Reddit
%A Gao, Shuang
%A Pandya, Shivani
%A Agarwal, Smisha
%A Sedoc, João
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Lavelli, Alberto
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis
%D 2021
%8 April
%I Association for Computational Linguistics
%C online
%F gao-etal-2021-topic
%X This paper applies topic modeling to understand maternal health topics, concerns, and questions expressed in online communities on social networking sites. We examine Latent Dirichlet Analysis (LDA) and two state-of-the-art methods: neural topic model with knowledge distillation (KD) and Embedded Topic Model (ETM) on maternal health texts collected from Reddit. The models are evaluated on topic quality and topic inference, using both auto-evaluation metrics and human assessment. We analyze a disconnect between automatic metrics and human evaluations. While LDA performs the best overall with the auto-evaluation metrics NPMI and Coherence, Neural Topic Model with Knowledge Distillation is favorable by expert evaluation. We also create a new partially expert annotated gold-standard maternal health topic
%U https://aclanthology.org/2021.louhi-1.8
%P 69-76
Markdown (Informal)
[Topic Modeling for Maternal Health Using Reddit](https://aclanthology.org/2021.louhi-1.8) (Gao et al., Louhi 2021)
ACL
- Shuang Gao, Shivani Pandya, Smisha Agarwal, and João Sedoc. 2021. Topic Modeling for Maternal Health Using Reddit. In Proceedings of the 12th International Workshop on Health Text Mining and Information Analysis, pages 69–76, online. Association for Computational Linguistics.