@inproceedings{preiss-chen-2024-incorporating,
title = "Incorporating Word Count Information into Depression Risk Summary Generation: {INF}@{U}o{S} {CLP}sych 2024 Submission",
author = "Preiss, Judita and
Chen, Zenan",
editor = "Yates, Andrew and
Desmet, Bart and
Prud{'}hommeaux, Emily and
Zirikly, Ayah and
Bedrick, Steven and
MacAvaney, Sean and
Bar, Kfir and
Ireland, Molly and
Ophir, Yaakov",
booktitle = "Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024)",
month = mar,
year = "2024",
address = "St. Julians, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.clpsych-1.19",
pages = "211--217",
abstract = "Large language model classifiers do not directly offer transparency: it is not clear why one class is chosen over another. In this work, summaries explaining the suicide risk level assigned using a fine-tuned mental-roberta-base model are generated from key phrases extracted using SHAP explainability using Mistral-7B. The training data for the classifier consists of all Reddit posts of a user in the University of Maryland Reddit Suicidality Dataset, Version 2, with their suicide risk labels along with selected features extracted from each post by the Linguistic Inquiry and Word Count (LIWC-22) tool. The resulting model is used to make predictions regarding risk on each post of the users in the evaluation set of the CLPsych 2024 shared task, with a SHAP explainer used to identify the phrases contributing to the top scoring, correct and severe risk categories. Some basic stoplisting is applied to the extracted phrases, along with length based filtering, and a locally run version of Mistral-7B-Instruct-v0.1 is used to create summaries from the highest value (based on SHAP) phrases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="preiss-chen-2024-incorporating">
<titleInfo>
<title>Incorporating Word Count Information into Depression Risk Summary Generation: INF@UoS CLPsych 2024 Submission</title>
</titleInfo>
<name type="personal">
<namePart type="given">Judita</namePart>
<namePart type="family">Preiss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zenan</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Yates</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Prud’hommeaux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayah</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julians, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language model classifiers do not directly offer transparency: it is not clear why one class is chosen over another. In this work, summaries explaining the suicide risk level assigned using a fine-tuned mental-roberta-base model are generated from key phrases extracted using SHAP explainability using Mistral-7B. The training data for the classifier consists of all Reddit posts of a user in the University of Maryland Reddit Suicidality Dataset, Version 2, with their suicide risk labels along with selected features extracted from each post by the Linguistic Inquiry and Word Count (LIWC-22) tool. The resulting model is used to make predictions regarding risk on each post of the users in the evaluation set of the CLPsych 2024 shared task, with a SHAP explainer used to identify the phrases contributing to the top scoring, correct and severe risk categories. Some basic stoplisting is applied to the extracted phrases, along with length based filtering, and a locally run version of Mistral-7B-Instruct-v0.1 is used to create summaries from the highest value (based on SHAP) phrases.</abstract>
<identifier type="citekey">preiss-chen-2024-incorporating</identifier>
<location>
<url>https://aclanthology.org/2024.clpsych-1.19</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>211</start>
<end>217</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Incorporating Word Count Information into Depression Risk Summary Generation: INF@UoS CLPsych 2024 Submission
%A Preiss, Judita
%A Chen, Zenan
%Y Yates, Andrew
%Y Desmet, Bart
%Y Prud’hommeaux, Emily
%Y Zirikly, Ayah
%Y Bedrick, Steven
%Y MacAvaney, Sean
%Y Bar, Kfir
%Y Ireland, Molly
%Y Ophir, Yaakov
%S Proceedings of the 9th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2024)
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julians, Malta
%F preiss-chen-2024-incorporating
%X Large language model classifiers do not directly offer transparency: it is not clear why one class is chosen over another. In this work, summaries explaining the suicide risk level assigned using a fine-tuned mental-roberta-base model are generated from key phrases extracted using SHAP explainability using Mistral-7B. The training data for the classifier consists of all Reddit posts of a user in the University of Maryland Reddit Suicidality Dataset, Version 2, with their suicide risk labels along with selected features extracted from each post by the Linguistic Inquiry and Word Count (LIWC-22) tool. The resulting model is used to make predictions regarding risk on each post of the users in the evaluation set of the CLPsych 2024 shared task, with a SHAP explainer used to identify the phrases contributing to the top scoring, correct and severe risk categories. Some basic stoplisting is applied to the extracted phrases, along with length based filtering, and a locally run version of Mistral-7B-Instruct-v0.1 is used to create summaries from the highest value (based on SHAP) phrases.
%U https://aclanthology.org/2024.clpsych-1.19
%P 211-217
Markdown (Informal)
[Incorporating Word Count Information into Depression Risk Summary Generation: INF@UoS CLPsych 2024 Submission](https://aclanthology.org/2024.clpsych-1.19) (Preiss & Chen, CLPsych-WS 2024)
ACL