@inproceedings{hengle-etal-2024-still,
title = "Still Not Quite There! Evaluating Large Language Models for Comorbid Mental Health Diagnosis",
author = "Hengle, Amey and
Kulkarni, Atharva and
Patankar, Shantanu Deepak and
Chandrasekaran, Madhumitha and
D{'}silva, Sneha and
Jacob, Jemima S. and
Gupta, Rashmi",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.931/",
doi = "10.18653/v1/2024.emnlp-main.931",
pages = "16698--16721",
abstract = "In this study, we introduce ANGST, a novel, first of its kind benchmark for depression-anxiety comorbidity classification from social media posts. Unlike contemporary datasets that often oversimplify the intricate interplay between different mental health disorders by treating them as isolated conditions, ANGST enables multi-label classification, allowing each post to be simultaneously identified as indicating depression and/or anxiety. Comprising 2876 meticulously annotated posts by expert psychologists and an additional 7667 silver-labeled posts, ANGST posits a more representative sample of online mental health discourse. Moreover, we benchmark ANGST using various state-of-the-art language models, ranging from Mental-BERT to GPT-4. Our results provide significant insights into the capabilities and limitations of these models in complex diagnostic scenarios. While GPT-4 generally outperforms other models, none achieve an F1 score exceeding 72{\%} in multi-class comorbid classification, underscoring the ongoing challenges in applying language models to mental health diagnostics."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hengle-etal-2024-still">
<titleInfo>
<title>Still Not Quite There! Evaluating Large Language Models for Comorbid Mental Health Diagnosis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amey</namePart>
<namePart type="family">Hengle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atharva</namePart>
<namePart type="family">Kulkarni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shantanu</namePart>
<namePart type="given">Deepak</namePart>
<namePart type="family">Patankar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madhumitha</namePart>
<namePart type="family">Chandrasekaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sneha</namePart>
<namePart type="family">D’silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jemima</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Jacob</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we introduce ANGST, a novel, first of its kind benchmark for depression-anxiety comorbidity classification from social media posts. Unlike contemporary datasets that often oversimplify the intricate interplay between different mental health disorders by treating them as isolated conditions, ANGST enables multi-label classification, allowing each post to be simultaneously identified as indicating depression and/or anxiety. Comprising 2876 meticulously annotated posts by expert psychologists and an additional 7667 silver-labeled posts, ANGST posits a more representative sample of online mental health discourse. Moreover, we benchmark ANGST using various state-of-the-art language models, ranging from Mental-BERT to GPT-4. Our results provide significant insights into the capabilities and limitations of these models in complex diagnostic scenarios. While GPT-4 generally outperforms other models, none achieve an F1 score exceeding 72% in multi-class comorbid classification, underscoring the ongoing challenges in applying language models to mental health diagnostics.</abstract>
<identifier type="citekey">hengle-etal-2024-still</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.931</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.931/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>16698</start>
<end>16721</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Still Not Quite There! Evaluating Large Language Models for Comorbid Mental Health Diagnosis
%A Hengle, Amey
%A Kulkarni, Atharva
%A Patankar, Shantanu Deepak
%A Chandrasekaran, Madhumitha
%A D’silva, Sneha
%A Jacob, Jemima S.
%A Gupta, Rashmi
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F hengle-etal-2024-still
%X In this study, we introduce ANGST, a novel, first of its kind benchmark for depression-anxiety comorbidity classification from social media posts. Unlike contemporary datasets that often oversimplify the intricate interplay between different mental health disorders by treating them as isolated conditions, ANGST enables multi-label classification, allowing each post to be simultaneously identified as indicating depression and/or anxiety. Comprising 2876 meticulously annotated posts by expert psychologists and an additional 7667 silver-labeled posts, ANGST posits a more representative sample of online mental health discourse. Moreover, we benchmark ANGST using various state-of-the-art language models, ranging from Mental-BERT to GPT-4. Our results provide significant insights into the capabilities and limitations of these models in complex diagnostic scenarios. While GPT-4 generally outperforms other models, none achieve an F1 score exceeding 72% in multi-class comorbid classification, underscoring the ongoing challenges in applying language models to mental health diagnostics.
%R 10.18653/v1/2024.emnlp-main.931
%U https://aclanthology.org/2024.emnlp-main.931/
%U https://doi.org/10.18653/v1/2024.emnlp-main.931
%P 16698-16721
Markdown (Informal)
[Still Not Quite There! Evaluating Large Language Models for Comorbid Mental Health Diagnosis](https://aclanthology.org/2024.emnlp-main.931/) (Hengle et al., EMNLP 2024)
ACL
- Amey Hengle, Atharva Kulkarni, Shantanu Deepak Patankar, Madhumitha Chandrasekaran, Sneha D’silva, Jemima S. Jacob, and Rashmi Gupta. 2024. Still Not Quite There! Evaluating Large Language Models for Comorbid Mental Health Diagnosis. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing, pages 16698–16721, Miami, Florida, USA. Association for Computational Linguistics.