@inproceedings{campione-etal-2026-language,
title = "Language-Based Detection of Adherence to Evidence-Based Psychotherapy Scripts",
author = "Campione, Samuel and
Stade, Elizabeth and
Losavio, Stefanie and
Singhvi, Shreya and
Xuan, William and
Bui, Tony and
Martin Lopez, Maria and
Subrahmanya, Shashanka and
Schuhmann, Bailee and
Worley, Courtney and
Wiltsey Stirman, Shannon and
Eichstaedt, Johannes and
Schwartz, H. Andrew",
editor = "Zirikly, Aya and
Bar, Kfir and
MacAvaney, Sean and
Ireland, Molly and
Ophir, Yaakov and
Atzil-Slonim, Dana and
Varadarajan, Vasudha and
Bedrick, Steven and
Desmet, Bart",
booktitle = "Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology ({CLP}sych 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.clpsych-1.20/",
pages = "250--257",
ISBN = "979-8-89176-421-7",
abstract = "Some psychotherapies, such as written exposure therapy for posttraumatic stress disorder, utilize ``scripts'' during parts of treatment, but verifying script adherence to ensure engagement of key mechanisms of change is a time-consuming step for therapy supervisors. Here, we formalize therapy script adherence as an NLP task, and evaluate several simple (text similarity) and more complex (few-shot LLM) approaches. Over 351 annotated therapist utterance-script pairs, we find text similarity approaches to be highly competitive with LLMs and produce fewer false positives. ROUGE-L recall achieves F1 = 0.973, and BLEU achieves F1 = 0.972 with full precision and zero false positives. GPT-5.2 achieves F1 = 0.935 and GPT-4o-mini achieves F1 = 0.876. Given that the text similarity techniques are multiple orders of magnitude less complex, our results underscore the ability for simpler NLP techniques to still be effective in the age of LLMs for tasks that are more textual in nature, suggesting that aspects of therapist fidelity to evidence-based treatments can be assessed without using cloud API calls."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="campione-etal-2026-language">
<titleInfo>
<title>Language-Based Detection of Adherence to Evidence-Based Psychotherapy Scripts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Campione</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Stade</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Losavio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shreya</namePart>
<namePart type="family">Singhvi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="family">Xuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="family">Bui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Martin Lopez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shashanka</namePart>
<namePart type="family">Subrahmanya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bailee</namePart>
<namePart type="family">Schuhmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Courtney</namePart>
<namePart type="family">Worley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Wiltsey Stirman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Eichstaedt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Andrew</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aya</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Atzil-Slonim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-421-7</identifier>
</relatedItem>
<abstract>Some psychotherapies, such as written exposure therapy for posttraumatic stress disorder, utilize “scripts” during parts of treatment, but verifying script adherence to ensure engagement of key mechanisms of change is a time-consuming step for therapy supervisors. Here, we formalize therapy script adherence as an NLP task, and evaluate several simple (text similarity) and more complex (few-shot LLM) approaches. Over 351 annotated therapist utterance-script pairs, we find text similarity approaches to be highly competitive with LLMs and produce fewer false positives. ROUGE-L recall achieves F1 = 0.973, and BLEU achieves F1 = 0.972 with full precision and zero false positives. GPT-5.2 achieves F1 = 0.935 and GPT-4o-mini achieves F1 = 0.876. Given that the text similarity techniques are multiple orders of magnitude less complex, our results underscore the ability for simpler NLP techniques to still be effective in the age of LLMs for tasks that are more textual in nature, suggesting that aspects of therapist fidelity to evidence-based treatments can be assessed without using cloud API calls.</abstract>
<identifier type="citekey">campione-etal-2026-language</identifier>
<location>
<url>https://aclanthology.org/2026.clpsych-1.20/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>250</start>
<end>257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language-Based Detection of Adherence to Evidence-Based Psychotherapy Scripts
%A Campione, Samuel
%A Stade, Elizabeth
%A Losavio, Stefanie
%A Singhvi, Shreya
%A Xuan, William
%A Bui, Tony
%A Martin Lopez, Maria
%A Subrahmanya, Shashanka
%A Schuhmann, Bailee
%A Worley, Courtney
%A Wiltsey Stirman, Shannon
%A Eichstaedt, Johannes
%A Schwartz, H. Andrew
%Y Zirikly, Aya
%Y Bar, Kfir
%Y MacAvaney, Sean
%Y Ireland, Molly
%Y Ophir, Yaakov
%Y Atzil-Slonim, Dana
%Y Varadarajan, Vasudha
%Y Bedrick, Steven
%Y Desmet, Bart
%S Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-421-7
%F campione-etal-2026-language
%X Some psychotherapies, such as written exposure therapy for posttraumatic stress disorder, utilize “scripts” during parts of treatment, but verifying script adherence to ensure engagement of key mechanisms of change is a time-consuming step for therapy supervisors. Here, we formalize therapy script adherence as an NLP task, and evaluate several simple (text similarity) and more complex (few-shot LLM) approaches. Over 351 annotated therapist utterance-script pairs, we find text similarity approaches to be highly competitive with LLMs and produce fewer false positives. ROUGE-L recall achieves F1 = 0.973, and BLEU achieves F1 = 0.972 with full precision and zero false positives. GPT-5.2 achieves F1 = 0.935 and GPT-4o-mini achieves F1 = 0.876. Given that the text similarity techniques are multiple orders of magnitude less complex, our results underscore the ability for simpler NLP techniques to still be effective in the age of LLMs for tasks that are more textual in nature, suggesting that aspects of therapist fidelity to evidence-based treatments can be assessed without using cloud API calls.
%U https://aclanthology.org/2026.clpsych-1.20/
%P 250-257
Markdown (Informal)
[Language-Based Detection of Adherence to Evidence-Based Psychotherapy Scripts](https://aclanthology.org/2026.clpsych-1.20/) (Campione et al., CLPsych 2026)
ACL
- Samuel Campione, Elizabeth Stade, Stefanie Losavio, Shreya Singhvi, William Xuan, Tony Bui, Maria Martin Lopez, Shashanka Subrahmanya, Bailee Schuhmann, Courtney Worley, Shannon Wiltsey Stirman, Johannes Eichstaedt, and H. Andrew Schwartz. 2026. Language-Based Detection of Adherence to Evidence-Based Psychotherapy Scripts. In Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026), pages 250–257, San Diego, California, USA. Association for Computational Linguistics.