@inproceedings{singh-etal-2025-systematic,
title = "Systematic Evaluation of Auto-Encoding and Large Language Model Representations for Capturing Author States and Traits",
author = "Singh, Khushboo and
Varadarajan, Vasudha and
V. Ganesan, Adithya and
Nilsson, August H{\r{a}}kan and
Soni, Nikita and
Mahwish, Syeda and
Chitale, Pranav and
Boyd, Ryan L. and
Ungar, Lyle and
Rosenthal, Richard N. and
Schwartz, H. Andrew",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.971/",
doi = "10.18653/v1/2025.findings-acl.971",
pages = "18955--18973",
ISBN = "979-8-89176-256-5",
abstract = "Large Language Models (LLMs) are increasingly used in human-centered applications, yet their ability to model diverse psychological constructs is not well understood. In this study, we systematically evaluate a range of Transformer-LMs to predict psychological variables across five major dimensions: affect, substance use, mental health, sociodemographics, and personality. Analyses span three temporal levels{---}short daily text responses about current affect, text aggregated over two-weeks, and user-level text collected over two years{---}allowing us to examine how each model{'}s strengths align with the underlying stability of different constructs. The findings show that mental health signals emerge as the most accurately predicted dimensions (r=0.6) across all temporal scales. At the daily scale, smaller models like DeBERTa and HaRT often performed better, whereas, at longer scales or with greater context, larger model like Llama3-8B performed the best. Also, aggregating text over the entire study period yielded stronger correlations for outcomes, such as age and income. Overall, these results suggest the importance of selecting appropriate model architectures and temporal aggregation techniques based on the stability and nature of the target variable."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-etal-2025-systematic">
<titleInfo>
<title>Systematic Evaluation of Auto-Encoding and Large Language Model Representations for Capturing Author States and Traits</title>
</titleInfo>
<name type="personal">
<namePart type="given">Khushboo</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudha</namePart>
<namePart type="family">Varadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adithya</namePart>
<namePart type="family">V. Ganesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">August</namePart>
<namePart type="given">Håkan</namePart>
<namePart type="family">Nilsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Soni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Syeda</namePart>
<namePart type="family">Mahwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranav</namePart>
<namePart type="family">Chitale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Boyd</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lyle</namePart>
<namePart type="family">Ungar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Andrew</namePart>
<namePart type="family">Schwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) are increasingly used in human-centered applications, yet their ability to model diverse psychological constructs is not well understood. In this study, we systematically evaluate a range of Transformer-LMs to predict psychological variables across five major dimensions: affect, substance use, mental health, sociodemographics, and personality. Analyses span three temporal levels—short daily text responses about current affect, text aggregated over two-weeks, and user-level text collected over two years—allowing us to examine how each model’s strengths align with the underlying stability of different constructs. The findings show that mental health signals emerge as the most accurately predicted dimensions (r=0.6) across all temporal scales. At the daily scale, smaller models like DeBERTa and HaRT often performed better, whereas, at longer scales or with greater context, larger model like Llama3-8B performed the best. Also, aggregating text over the entire study period yielded stronger correlations for outcomes, such as age and income. Overall, these results suggest the importance of selecting appropriate model architectures and temporal aggregation techniques based on the stability and nature of the target variable.</abstract>
<identifier type="citekey">singh-etal-2025-systematic</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.971</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.971/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>18955</start>
<end>18973</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Systematic Evaluation of Auto-Encoding and Large Language Model Representations for Capturing Author States and Traits
%A Singh, Khushboo
%A Varadarajan, Vasudha
%A V. Ganesan, Adithya
%A Nilsson, August Håkan
%A Soni, Nikita
%A Mahwish, Syeda
%A Chitale, Pranav
%A Boyd, Ryan L.
%A Ungar, Lyle
%A Rosenthal, Richard N.
%A Schwartz, H. Andrew
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F singh-etal-2025-systematic
%X Large Language Models (LLMs) are increasingly used in human-centered applications, yet their ability to model diverse psychological constructs is not well understood. In this study, we systematically evaluate a range of Transformer-LMs to predict psychological variables across five major dimensions: affect, substance use, mental health, sociodemographics, and personality. Analyses span three temporal levels—short daily text responses about current affect, text aggregated over two-weeks, and user-level text collected over two years—allowing us to examine how each model’s strengths align with the underlying stability of different constructs. The findings show that mental health signals emerge as the most accurately predicted dimensions (r=0.6) across all temporal scales. At the daily scale, smaller models like DeBERTa and HaRT often performed better, whereas, at longer scales or with greater context, larger model like Llama3-8B performed the best. Also, aggregating text over the entire study period yielded stronger correlations for outcomes, such as age and income. Overall, these results suggest the importance of selecting appropriate model architectures and temporal aggregation techniques based on the stability and nature of the target variable.
%R 10.18653/v1/2025.findings-acl.971
%U https://aclanthology.org/2025.findings-acl.971/
%U https://doi.org/10.18653/v1/2025.findings-acl.971
%P 18955-18973
Markdown (Informal)
[Systematic Evaluation of Auto-Encoding and Large Language Model Representations for Capturing Author States and Traits](https://aclanthology.org/2025.findings-acl.971/) (Singh et al., Findings 2025)
ACL
- Khushboo Singh, Vasudha Varadarajan, Adithya V. Ganesan, August Håkan Nilsson, Nikita Soni, Syeda Mahwish, Pranav Chitale, Ryan L. Boyd, Lyle Ungar, Richard N. Rosenthal, and H. Andrew Schwartz. 2025. Systematic Evaluation of Auto-Encoding and Large Language Model Representations for Capturing Author States and Traits. In Findings of the Association for Computational Linguistics: ACL 2025, pages 18955–18973, Vienna, Austria. Association for Computational Linguistics.