@inproceedings{kermani-etal-2025-systematic,
title = "A Systematic Evaluation of {LLM} Strategies for Mental Health Text Analysis: Fine-tuning vs. Prompt Engineering vs. {RAG}",
author = "Kermani, Arshia and
Perez-Rosas, Veronica and
Metsis, Vangelis",
editor = "Zirikly, Ayah and
Yates, Andrew and
Desmet, Bart and
Ireland, Molly and
Bedrick, Steven and
MacAvaney, Sean and
Bar, Kfir and
Ophir, Yaakov",
booktitle = "Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.clpsych-1.14/",
doi = "10.18653/v1/2025.clpsych-1.14",
pages = "172--180",
ISBN = "979-8-89176-226-8",
abstract = "This study presents a systematic comparison of three approaches for the analysis of mental health text using large language models (LLMs): prompt engineering, retrieval augmented generation (RAG), and fine-tuning. Using LLaMA 3, we evaluate these approaches on emotion classification and mental health condition detection tasks across two datasets. Fine-tuning achieves the highest accuracy (91{\%} for emotion classification, 80{\%} for mental health conditions) but requires substantial computational resources and large training sets, while prompt engineering and RAG offer more flexible deployment with moderate performance (40-68{\%} accuracy). Our findings provide practical insights for implementing LLM-based solutions in mental health applications, highlighting the trade-offs between accuracy, computational requirements, and deployment flexibility."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kermani-etal-2025-systematic">
<titleInfo>
<title>A Systematic Evaluation of LLM Strategies for Mental Health Text Analysis: Fine-tuning vs. Prompt Engineering vs. RAG</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arshia</namePart>
<namePart type="family">Kermani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronica</namePart>
<namePart type="family">Perez-Rosas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vangelis</namePart>
<namePart type="family">Metsis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayah</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Yates</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-226-8</identifier>
</relatedItem>
<abstract>This study presents a systematic comparison of three approaches for the analysis of mental health text using large language models (LLMs): prompt engineering, retrieval augmented generation (RAG), and fine-tuning. Using LLaMA 3, we evaluate these approaches on emotion classification and mental health condition detection tasks across two datasets. Fine-tuning achieves the highest accuracy (91% for emotion classification, 80% for mental health conditions) but requires substantial computational resources and large training sets, while prompt engineering and RAG offer more flexible deployment with moderate performance (40-68% accuracy). Our findings provide practical insights for implementing LLM-based solutions in mental health applications, highlighting the trade-offs between accuracy, computational requirements, and deployment flexibility.</abstract>
<identifier type="citekey">kermani-etal-2025-systematic</identifier>
<identifier type="doi">10.18653/v1/2025.clpsych-1.14</identifier>
<location>
<url>https://aclanthology.org/2025.clpsych-1.14/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>172</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Systematic Evaluation of LLM Strategies for Mental Health Text Analysis: Fine-tuning vs. Prompt Engineering vs. RAG
%A Kermani, Arshia
%A Perez-Rosas, Veronica
%A Metsis, Vangelis
%Y Zirikly, Ayah
%Y Yates, Andrew
%Y Desmet, Bart
%Y Ireland, Molly
%Y Bedrick, Steven
%Y MacAvaney, Sean
%Y Bar, Kfir
%Y Ophir, Yaakov
%S Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-226-8
%F kermani-etal-2025-systematic
%X This study presents a systematic comparison of three approaches for the analysis of mental health text using large language models (LLMs): prompt engineering, retrieval augmented generation (RAG), and fine-tuning. Using LLaMA 3, we evaluate these approaches on emotion classification and mental health condition detection tasks across two datasets. Fine-tuning achieves the highest accuracy (91% for emotion classification, 80% for mental health conditions) but requires substantial computational resources and large training sets, while prompt engineering and RAG offer more flexible deployment with moderate performance (40-68% accuracy). Our findings provide practical insights for implementing LLM-based solutions in mental health applications, highlighting the trade-offs between accuracy, computational requirements, and deployment flexibility.
%R 10.18653/v1/2025.clpsych-1.14
%U https://aclanthology.org/2025.clpsych-1.14/
%U https://doi.org/10.18653/v1/2025.clpsych-1.14
%P 172-180
Markdown (Informal)
[A Systematic Evaluation of LLM Strategies for Mental Health Text Analysis: Fine-tuning vs. Prompt Engineering vs. RAG](https://aclanthology.org/2025.clpsych-1.14/) (Kermani et al., CLPsych 2025)
ACL