@inproceedings{xu-etal-2022-understanding-narratives,
title = "Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models",
author = "Xu, Xiao and
Stulp, Gert and
Van Den Bosch, Antal and
Gauthier, Anne",
editor = "Bamman, David and
Hovy, Dirk and
Jurgens, David and
Keith, Katherine and
O'Connor, Brendan and
Volkova, Svitlana",
booktitle = "Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)",
month = nov,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.nlpcss-1.4",
doi = "10.18653/v1/2022.nlpcss-1.4",
pages = "33--38",
abstract = "Fertility intentions as verbalized in surveys are a poor predictor of actual fertility outcomes, the number of children people have. This can partly be explained by the uncertainty people have in their intentions. Such uncertainties are hard to capture through traditional survey questions, although open-ended questions can be used to get insight into people{'}s subjective narratives of the future that determine their intentions. Analyzing such answers to open-ended questions can be done through Natural Language Processing techniques. Traditional topic models (e.g., LSA and LDA), however, often fail to do since they rely on co-occurrences, which are often rare in short survey responses. The aim of this study was to apply and evaluate topic models on demographic survey data. In this study, we applied neural topic models (e.g. BERTopic, CombinedTM) based on language models to responses from Dutch women on their fertility plans, and compared the topics and their coherence scores from each model to expert judgments. Our results show that neural models produce topics more in line with human interpretation compared to LDA. However, the coherence score could only partly reflect on this, depending on the corpus used for calculation. This research is important because, first, it helps us develop more informed strategies on model selection and evaluation for topic modeling on survey data; and second, it shows that the field of demography has much to gain from adopting NLP methods.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xu-etal-2022-understanding-narratives">
<titleInfo>
<title>Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gert</namePart>
<namePart type="family">Stulp</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antal</namePart>
<namePart type="family">Van Den Bosch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Gauthier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katherine</namePart>
<namePart type="family">Keith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Svitlana</namePart>
<namePart type="family">Volkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Fertility intentions as verbalized in surveys are a poor predictor of actual fertility outcomes, the number of children people have. This can partly be explained by the uncertainty people have in their intentions. Such uncertainties are hard to capture through traditional survey questions, although open-ended questions can be used to get insight into people’s subjective narratives of the future that determine their intentions. Analyzing such answers to open-ended questions can be done through Natural Language Processing techniques. Traditional topic models (e.g., LSA and LDA), however, often fail to do since they rely on co-occurrences, which are often rare in short survey responses. The aim of this study was to apply and evaluate topic models on demographic survey data. In this study, we applied neural topic models (e.g. BERTopic, CombinedTM) based on language models to responses from Dutch women on their fertility plans, and compared the topics and their coherence scores from each model to expert judgments. Our results show that neural models produce topics more in line with human interpretation compared to LDA. However, the coherence score could only partly reflect on this, depending on the corpus used for calculation. This research is important because, first, it helps us develop more informed strategies on model selection and evaluation for topic modeling on survey data; and second, it shows that the field of demography has much to gain from adopting NLP methods.</abstract>
<identifier type="citekey">xu-etal-2022-understanding-narratives</identifier>
<identifier type="doi">10.18653/v1/2022.nlpcss-1.4</identifier>
<location>
<url>https://aclanthology.org/2022.nlpcss-1.4</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>33</start>
<end>38</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models
%A Xu, Xiao
%A Stulp, Gert
%A Van Den Bosch, Antal
%A Gauthier, Anne
%Y Bamman, David
%Y Hovy, Dirk
%Y Jurgens, David
%Y Keith, Katherine
%Y O’Connor, Brendan
%Y Volkova, Svitlana
%S Proceedings of the Fifth Workshop on Natural Language Processing and Computational Social Science (NLP+CSS)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F xu-etal-2022-understanding-narratives
%X Fertility intentions as verbalized in surveys are a poor predictor of actual fertility outcomes, the number of children people have. This can partly be explained by the uncertainty people have in their intentions. Such uncertainties are hard to capture through traditional survey questions, although open-ended questions can be used to get insight into people’s subjective narratives of the future that determine their intentions. Analyzing such answers to open-ended questions can be done through Natural Language Processing techniques. Traditional topic models (e.g., LSA and LDA), however, often fail to do since they rely on co-occurrences, which are often rare in short survey responses. The aim of this study was to apply and evaluate topic models on demographic survey data. In this study, we applied neural topic models (e.g. BERTopic, CombinedTM) based on language models to responses from Dutch women on their fertility plans, and compared the topics and their coherence scores from each model to expert judgments. Our results show that neural models produce topics more in line with human interpretation compared to LDA. However, the coherence score could only partly reflect on this, depending on the corpus used for calculation. This research is important because, first, it helps us develop more informed strategies on model selection and evaluation for topic modeling on survey data; and second, it shows that the field of demography has much to gain from adopting NLP methods.
%R 10.18653/v1/2022.nlpcss-1.4
%U https://aclanthology.org/2022.nlpcss-1.4
%U https://doi.org/10.18653/v1/2022.nlpcss-1.4
%P 33-38
Markdown (Informal)
[Understanding Narratives from Demographic Survey Data: a Comparative Study with Multiple Neural Topic Models](https://aclanthology.org/2022.nlpcss-1.4) (Xu et al., NLP+CSS 2022)
ACL