@inproceedings{deshpande-etal-2025-evaluating,
title = "Evaluating Large Language Models for Enhancing Live Chat Therapy: A Comparative Study with Psychotherapists",
author = {Deshpande, Neha Pravin and
Hillmann, Stefan and
M{\"o}ller, Sebastian},
editor = "B{\'e}chet, Fr{\'e}d{\'e}ric and
Lef{\`e}vre, Fabrice and
Asher, Nicholas and
Kim, Seokhwan and
Merlin, Teva",
booktitle = "Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue",
month = aug,
year = "2025",
address = "Avignon, France",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sigdial-1.59/",
pages = "800--812",
abstract = "Large Language Models (LLMs) hold promise for addressing the shortage of qualified therapists in mental health care. While chatbot-based Cognitive Behavioral Therapy (CBT) tools exist, their efficacy in sensitive contexts remains underexplored. This study examines the potential of LLMs to support therapy sessions aimed at reducing Child Sexual Abuse Material (CSAM) consumption. We propose a Retrieval-Augmented Generation (RAG) framework that leverages a fine-tuned BERT-based retriever to guide LLM-generated responses, better capturing the multi-turn, context-specific dynamics of therapy. Four LLMs{---}Qwen2-7B-Instruct, Mistral-7B-Instruct-v0.3, Orca-2-13B, and Zephyr-7B-Alpha{---}were evaluated in a small-scale study with 14 domain-expert psychotherapists. Our comparative analysis reveals that, in certain scenarios, LLMs like Mistral-7B-Instruct-v0.3 and Orca-2-13B were preferred over human therapist responses. While limited by sample size, these findings suggest that LLMs can perform at a level comparable to or even exceeding that of human therapists, especially in therapy focused on reducing CSAM consumption. Our code is available online: https://git.tu-berlin.de/neha.deshpande/therapy{\_}responses/-/tree/main"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deshpande-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Large Language Models for Enhancing Live Chat Therapy: A Comparative Study with Psychotherapists</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neha</namePart>
<namePart type="given">Pravin</namePart>
<namePart type="family">Deshpande</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Hillmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Möller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabrice</namePart>
<namePart type="family">Lefèvre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Asher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teva</namePart>
<namePart type="family">Merlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Avignon, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) hold promise for addressing the shortage of qualified therapists in mental health care. While chatbot-based Cognitive Behavioral Therapy (CBT) tools exist, their efficacy in sensitive contexts remains underexplored. This study examines the potential of LLMs to support therapy sessions aimed at reducing Child Sexual Abuse Material (CSAM) consumption. We propose a Retrieval-Augmented Generation (RAG) framework that leverages a fine-tuned BERT-based retriever to guide LLM-generated responses, better capturing the multi-turn, context-specific dynamics of therapy. Four LLMs—Qwen2-7B-Instruct, Mistral-7B-Instruct-v0.3, Orca-2-13B, and Zephyr-7B-Alpha—were evaluated in a small-scale study with 14 domain-expert psychotherapists. Our comparative analysis reveals that, in certain scenarios, LLMs like Mistral-7B-Instruct-v0.3 and Orca-2-13B were preferred over human therapist responses. While limited by sample size, these findings suggest that LLMs can perform at a level comparable to or even exceeding that of human therapists, especially in therapy focused on reducing CSAM consumption. Our code is available online: https://git.tu-berlin.de/neha.deshpande/therapy_responses/-/tree/main</abstract>
<identifier type="citekey">deshpande-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.sigdial-1.59/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>800</start>
<end>812</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Large Language Models for Enhancing Live Chat Therapy: A Comparative Study with Psychotherapists
%A Deshpande, Neha Pravin
%A Hillmann, Stefan
%A Möller, Sebastian
%Y Béchet, Frédéric
%Y Lefèvre, Fabrice
%Y Asher, Nicholas
%Y Kim, Seokhwan
%Y Merlin, Teva
%S Proceedings of the 26th Annual Meeting of the Special Interest Group on Discourse and Dialogue
%D 2025
%8 August
%I Association for Computational Linguistics
%C Avignon, France
%F deshpande-etal-2025-evaluating
%X Large Language Models (LLMs) hold promise for addressing the shortage of qualified therapists in mental health care. While chatbot-based Cognitive Behavioral Therapy (CBT) tools exist, their efficacy in sensitive contexts remains underexplored. This study examines the potential of LLMs to support therapy sessions aimed at reducing Child Sexual Abuse Material (CSAM) consumption. We propose a Retrieval-Augmented Generation (RAG) framework that leverages a fine-tuned BERT-based retriever to guide LLM-generated responses, better capturing the multi-turn, context-specific dynamics of therapy. Four LLMs—Qwen2-7B-Instruct, Mistral-7B-Instruct-v0.3, Orca-2-13B, and Zephyr-7B-Alpha—were evaluated in a small-scale study with 14 domain-expert psychotherapists. Our comparative analysis reveals that, in certain scenarios, LLMs like Mistral-7B-Instruct-v0.3 and Orca-2-13B were preferred over human therapist responses. While limited by sample size, these findings suggest that LLMs can perform at a level comparable to or even exceeding that of human therapists, especially in therapy focused on reducing CSAM consumption. Our code is available online: https://git.tu-berlin.de/neha.deshpande/therapy_responses/-/tree/main
%U https://aclanthology.org/2025.sigdial-1.59/
%P 800-812
Markdown (Informal)
[Evaluating Large Language Models for Enhancing Live Chat Therapy: A Comparative Study with Psychotherapists](https://aclanthology.org/2025.sigdial-1.59/) (Deshpande et al., SIGDIAL 2025)
ACL