@inproceedings{puspo-etal-2026-role,
title = "On the Role of Context in {LLM} Alignment to Mental Health Counseling Competencies",
author = {Puspo, Sadiya Sayara Chowdhury and
Zampieri, Marcos and
Uzuner, {\"O}zlem},
editor = "Zirikly, Aya and
Bar, Kfir and
MacAvaney, Sean and
Ireland, Molly and
Ophir, Yaakov and
Atzil-Slonim, Dana and
Varadarajan, Vasudha and
Bedrick, Steven and
Desmet, Bart",
booktitle = "Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology ({CLP}sych 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.clpsych-1.6/",
pages = "76--87",
ISBN = "979-8-89176-421-7",
abstract = "As Large Language Models (LLMs) demonstrate strong performance on clinical benchmarks, it remains unclear whether this reflects true patient-specific reasoning or reliance on generalized symptom patterns. To address this gap, we evaluate LLMs on a counseling competency benchmark to assess their use of patient-specific contextual information. Through controlled experiments with ablation experiments, role framing, Thread-of-Thought (ThoT) prompting, and input perturbations, we find that removing contextual details results in only modest performance drops, and predictions remain stable under input variations, indicating limited sensitivity to context. Although structured prompting increases explicit mention of patient details, it does not improve answer accuracy. Error analysis reveals systematic patterns where models favor general clinical associations over context-specific cues, even when such cues are correctly identified during intermediate reasoning. Our findings suggest that achieving passing-level performance does not guarantee context-sensitive decision-making revealing an important gap between apparent clinical competence and actual contextual reasoning. This indicates the need for evaluation frameworks that directly test context integration in mental health applications."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="puspo-etal-2026-role">
<titleInfo>
<title>On the Role of Context in LLM Alignment to Mental Health Counseling Competencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sadiya</namePart>
<namePart type="given">Sayara</namePart>
<namePart type="given">Chowdhury</namePart>
<namePart type="family">Puspo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Özlem</namePart>
<namePart type="family">Uzuner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aya</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Atzil-Slonim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-421-7</identifier>
</relatedItem>
<abstract>As Large Language Models (LLMs) demonstrate strong performance on clinical benchmarks, it remains unclear whether this reflects true patient-specific reasoning or reliance on generalized symptom patterns. To address this gap, we evaluate LLMs on a counseling competency benchmark to assess their use of patient-specific contextual information. Through controlled experiments with ablation experiments, role framing, Thread-of-Thought (ThoT) prompting, and input perturbations, we find that removing contextual details results in only modest performance drops, and predictions remain stable under input variations, indicating limited sensitivity to context. Although structured prompting increases explicit mention of patient details, it does not improve answer accuracy. Error analysis reveals systematic patterns where models favor general clinical associations over context-specific cues, even when such cues are correctly identified during intermediate reasoning. Our findings suggest that achieving passing-level performance does not guarantee context-sensitive decision-making revealing an important gap between apparent clinical competence and actual contextual reasoning. This indicates the need for evaluation frameworks that directly test context integration in mental health applications.</abstract>
<identifier type="citekey">puspo-etal-2026-role</identifier>
<location>
<url>https://aclanthology.org/2026.clpsych-1.6/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>76</start>
<end>87</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Role of Context in LLM Alignment to Mental Health Counseling Competencies
%A Puspo, Sadiya Sayara Chowdhury
%A Zampieri, Marcos
%A Uzuner, Özlem
%Y Zirikly, Aya
%Y Bar, Kfir
%Y MacAvaney, Sean
%Y Ireland, Molly
%Y Ophir, Yaakov
%Y Atzil-Slonim, Dana
%Y Varadarajan, Vasudha
%Y Bedrick, Steven
%Y Desmet, Bart
%S Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-421-7
%F puspo-etal-2026-role
%X As Large Language Models (LLMs) demonstrate strong performance on clinical benchmarks, it remains unclear whether this reflects true patient-specific reasoning or reliance on generalized symptom patterns. To address this gap, we evaluate LLMs on a counseling competency benchmark to assess their use of patient-specific contextual information. Through controlled experiments with ablation experiments, role framing, Thread-of-Thought (ThoT) prompting, and input perturbations, we find that removing contextual details results in only modest performance drops, and predictions remain stable under input variations, indicating limited sensitivity to context. Although structured prompting increases explicit mention of patient details, it does not improve answer accuracy. Error analysis reveals systematic patterns where models favor general clinical associations over context-specific cues, even when such cues are correctly identified during intermediate reasoning. Our findings suggest that achieving passing-level performance does not guarantee context-sensitive decision-making revealing an important gap between apparent clinical competence and actual contextual reasoning. This indicates the need for evaluation frameworks that directly test context integration in mental health applications.
%U https://aclanthology.org/2026.clpsych-1.6/
%P 76-87
Markdown (Informal)
[On the Role of Context in LLM Alignment to Mental Health Counseling Competencies](https://aclanthology.org/2026.clpsych-1.6/) (Puspo et al., CLPsych 2026)
ACL