@inproceedings{marker-etal-2026-evaluating,
title = "Evaluating Document-Tuned Transformer Representations for Person-level Mental Health Assessment",
author = "Marker, Aaron and
Kjell, Oscar and
Varadarajan, Vasudha and
Schwartz, H. Andrew",
editor = "Zirikly, Aya and
Bar, Kfir and
MacAvaney, Sean and
Ireland, Molly and
Ophir, Yaakov and
Atzil-Slonim, Dana and
Varadarajan, Vasudha and
Bedrick, Steven and
Desmet, Bart",
booktitle = "Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology ({CLP}sych 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.clpsych-1.14/",
pages = "178--187",
ISBN = "979-8-89176-421-7",
abstract = "Person-level psychological assessment requires aggregating meaning across many messages from the same individual, a task that document-level training objectives were not explicitly designed for. We present a systematic, empirical comparison between architecturally matched traditional (a) base-transformers and (b) document-tuned-transformers (further contrastively fine-tuned at the document-level, sometimes referred to as ``sentence transformers'') under otherwise identical conditions. Comparing layer-wise and overall performance across two longitudinal mental health and psychological datasets, we find document-tuned models demonstrated a consistent improvement over base representations (increase in Pearson r of 13.4{\%}, p=.015). Robustness analyses revealed document-tuned models remained more accurate under perturbations to word deletion, synonym replacement, typo injection, and back translation. Further, hedged language (e.g., `usually') was more characteristic of outcomes in document-tuned embeddings while abundance (e.g., `lot') was more characteristic of base-transformers, suggesting document-tuned models may better capture uncertainty.These results suggest representation choice impacts mental health prediction, document-tuned models often being more adept."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marker-etal-2026-evaluating">
<titleInfo>
<title>Evaluating Document-Tuned Transformer Representations for Person-level Mental Health Assessment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Marker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oscar</namePart>
<namePart type="family">Kjell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Andrew</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aya</namePart>
<namePart type="family">Zirikly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kfir</namePart>
<namePart type="family">Bar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">MacAvaney</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Ireland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaakov</namePart>
<namePart type="family">Ophir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="family">Atzil-Slonim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bedrick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bart</namePart>
<namePart type="family">Desmet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-421-7</identifier>
</relatedItem>
<abstract>Person-level psychological assessment requires aggregating meaning across many messages from the same individual, a task that document-level training objectives were not explicitly designed for. We present a systematic, empirical comparison between architecturally matched traditional (a) base-transformers and (b) document-tuned-transformers (further contrastively fine-tuned at the document-level, sometimes referred to as “sentence transformers”) under otherwise identical conditions. Comparing layer-wise and overall performance across two longitudinal mental health and psychological datasets, we find document-tuned models demonstrated a consistent improvement over base representations (increase in Pearson r of 13.4%, p=.015). Robustness analyses revealed document-tuned models remained more accurate under perturbations to word deletion, synonym replacement, typo injection, and back translation. Further, hedged language (e.g., ‘usually’) was more characteristic of outcomes in document-tuned embeddings while abundance (e.g., ‘lot’) was more characteristic of base-transformers, suggesting document-tuned models may better capture uncertainty.These results suggest representation choice impacts mental health prediction, document-tuned models often being more adept.</abstract>
<identifier type="citekey">marker-etal-2026-evaluating</identifier>
<location>
<url>https://aclanthology.org/2026.clpsych-1.14/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>178</start>
<end>187</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Document-Tuned Transformer Representations for Person-level Mental Health Assessment
%A Marker, Aaron
%A Kjell, Oscar
%A Varadarajan, Vasudha
%A Schwartz, H. Andrew
%Y Zirikly, Aya
%Y Bar, Kfir
%Y MacAvaney, Sean
%Y Ireland, Molly
%Y Ophir, Yaakov
%Y Atzil-Slonim, Dana
%Y Varadarajan, Vasudha
%Y Bedrick, Steven
%Y Desmet, Bart
%S Proceedings of the 10th Workshop on Computational Linguistics and Clinical Psychology (CLPsych 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, USA
%@ 979-8-89176-421-7
%F marker-etal-2026-evaluating
%X Person-level psychological assessment requires aggregating meaning across many messages from the same individual, a task that document-level training objectives were not explicitly designed for. We present a systematic, empirical comparison between architecturally matched traditional (a) base-transformers and (b) document-tuned-transformers (further contrastively fine-tuned at the document-level, sometimes referred to as “sentence transformers”) under otherwise identical conditions. Comparing layer-wise and overall performance across two longitudinal mental health and psychological datasets, we find document-tuned models demonstrated a consistent improvement over base representations (increase in Pearson r of 13.4%, p=.015). Robustness analyses revealed document-tuned models remained more accurate under perturbations to word deletion, synonym replacement, typo injection, and back translation. Further, hedged language (e.g., ‘usually’) was more characteristic of outcomes in document-tuned embeddings while abundance (e.g., ‘lot’) was more characteristic of base-transformers, suggesting document-tuned models may better capture uncertainty.These results suggest representation choice impacts mental health prediction, document-tuned models often being more adept.
%U https://aclanthology.org/2026.clpsych-1.14/
%P 178-187
Markdown (Informal)
[Evaluating Document-Tuned Transformer Representations for Person-level Mental Health Assessment](https://aclanthology.org/2026.clpsych-1.14/) (Marker et al., CLPsych 2026)
ACL