@inproceedings{zanwar-etal-2022-exploring,
title = "Exploring Hybrid and Ensemble Models for Multiclass Prediction of Mental Health Status on Social Media",
author = "Zanwar, Sourabh and
Wiechmann, Daniel and
Qiao, Yu and
Kerz, Elma",
editor = "Lavelli, Alberto and
Holderness, Eben and
Jimeno Yepes, Antonio and
Minard, Anne-Lyse and
Pustejovsky, James and
Rinaldi, Fabio",
booktitle = "Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.louhi-1.21",
doi = "10.18653/v1/2022.louhi-1.21",
pages = "184--196",
abstract = "In recent years, there has been a surge of interest in research on automatic mental health detection (MHD) from social media data leveraging advances in natural language processing and machine learning techniques. While significant progress has been achieved in this interdisciplinary research area, the vast majority of work has treated MHD as a binary classification task. The multiclass classification setup is, however, essential if we are to uncover the subtle differences among the statistical patterns of language use associated with particular mental health conditions. Here, we report on experiments aimed at predicting six conditions (anxiety, attention deficit hyperactivity disorder, bipolar disorder, post-traumatic stress disorder, depression, and psychological stress) from Reddit social media posts. We explore and compare the performance of hybrid and ensemble models leveraging transformer-based architectures (BERT and RoBERTa) and BiLSTM neural networks trained on within-text distributions of a diverse set of linguistic features. This set encompasses measures of syntactic complexity, lexical sophistication and diversity, readability, and register-specific ngram frequencies, as well as sentiment and emotion lexicons. In addition, we conduct feature ablation experiments to investigate which types of features are most indicative of particular mental health conditions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zanwar-etal-2022-exploring">
<titleInfo>
<title>Exploring Hybrid and Ensemble Models for Multiclass Prediction of Mental Health Status on Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sourabh</namePart>
<namePart type="family">Zanwar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Wiechmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Qiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elma</namePart>
<namePart type="family">Kerz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Lavelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eben</namePart>
<namePart type="family">Holderness</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Jimeno Yepes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Lyse</namePart>
<namePart type="family">Minard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In recent years, there has been a surge of interest in research on automatic mental health detection (MHD) from social media data leveraging advances in natural language processing and machine learning techniques. While significant progress has been achieved in this interdisciplinary research area, the vast majority of work has treated MHD as a binary classification task. The multiclass classification setup is, however, essential if we are to uncover the subtle differences among the statistical patterns of language use associated with particular mental health conditions. Here, we report on experiments aimed at predicting six conditions (anxiety, attention deficit hyperactivity disorder, bipolar disorder, post-traumatic stress disorder, depression, and psychological stress) from Reddit social media posts. We explore and compare the performance of hybrid and ensemble models leveraging transformer-based architectures (BERT and RoBERTa) and BiLSTM neural networks trained on within-text distributions of a diverse set of linguistic features. This set encompasses measures of syntactic complexity, lexical sophistication and diversity, readability, and register-specific ngram frequencies, as well as sentiment and emotion lexicons. In addition, we conduct feature ablation experiments to investigate which types of features are most indicative of particular mental health conditions.</abstract>
<identifier type="citekey">zanwar-etal-2022-exploring</identifier>
<identifier type="doi">10.18653/v1/2022.louhi-1.21</identifier>
<location>
<url>https://aclanthology.org/2022.louhi-1.21</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>184</start>
<end>196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Hybrid and Ensemble Models for Multiclass Prediction of Mental Health Status on Social Media
%A Zanwar, Sourabh
%A Wiechmann, Daniel
%A Qiao, Yu
%A Kerz, Elma
%Y Lavelli, Alberto
%Y Holderness, Eben
%Y Jimeno Yepes, Antonio
%Y Minard, Anne-Lyse
%Y Pustejovsky, James
%Y Rinaldi, Fabio
%S Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F zanwar-etal-2022-exploring
%X In recent years, there has been a surge of interest in research on automatic mental health detection (MHD) from social media data leveraging advances in natural language processing and machine learning techniques. While significant progress has been achieved in this interdisciplinary research area, the vast majority of work has treated MHD as a binary classification task. The multiclass classification setup is, however, essential if we are to uncover the subtle differences among the statistical patterns of language use associated with particular mental health conditions. Here, we report on experiments aimed at predicting six conditions (anxiety, attention deficit hyperactivity disorder, bipolar disorder, post-traumatic stress disorder, depression, and psychological stress) from Reddit social media posts. We explore and compare the performance of hybrid and ensemble models leveraging transformer-based architectures (BERT and RoBERTa) and BiLSTM neural networks trained on within-text distributions of a diverse set of linguistic features. This set encompasses measures of syntactic complexity, lexical sophistication and diversity, readability, and register-specific ngram frequencies, as well as sentiment and emotion lexicons. In addition, we conduct feature ablation experiments to investigate which types of features are most indicative of particular mental health conditions.
%R 10.18653/v1/2022.louhi-1.21
%U https://aclanthology.org/2022.louhi-1.21
%U https://doi.org/10.18653/v1/2022.louhi-1.21
%P 184-196
Markdown (Informal)
[Exploring Hybrid and Ensemble Models for Multiclass Prediction of Mental Health Status on Social Media](https://aclanthology.org/2022.louhi-1.21) (Zanwar et al., Louhi 2022)
ACL