@inproceedings{sicilia-alikhani-2025-evaluating,
title = "Evaluating Theory of (an uncertain) Mind: Predicting the Uncertain Beliefs of Others from Conversational Cues",
author = "Sicilia, Anthony and
Alikhani, Malihe",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.395/",
doi = "10.18653/v1/2025.acl-long.395",
pages = "8007--8021",
ISBN = "979-8-89176-251-0",
abstract = "Typically, when evaluating Theory of Mind, we consider the beliefs of others to be binary: held or not held. But what if someone is unsure about their own beliefs? How can we quantify this uncertainty? We propose a new suite of tasks, challenging language models (LMs) to model the uncertainty of participants in a dialogue. We design these tasks around conversation forecasting, where the goal is to predict the probability of an unobserved conversation outcome. Uniquely, we view conversation agents themselves as forecasters, asking an LM to predict the uncertainty of an individual from their language use. We experiment with scaling methods, bagging, and demographic context for this regression task, conducting experiments on three dialogue corpora (social, negotiation, task-oriented) with eight LMs. While LMs can explain up to 7{\%} variance in the uncertainty of others, we highlight the difficulty of the tasks and room for future work, especially in tasks that require explicit shifts in perspective."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sicilia-alikhani-2025-evaluating">
<titleInfo>
<title>Evaluating Theory of (an uncertain) Mind: Predicting the Uncertain Beliefs of Others from Conversational Cues</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Sicilia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malihe</namePart>
<namePart type="family">Alikhani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Typically, when evaluating Theory of Mind, we consider the beliefs of others to be binary: held or not held. But what if someone is unsure about their own beliefs? How can we quantify this uncertainty? We propose a new suite of tasks, challenging language models (LMs) to model the uncertainty of participants in a dialogue. We design these tasks around conversation forecasting, where the goal is to predict the probability of an unobserved conversation outcome. Uniquely, we view conversation agents themselves as forecasters, asking an LM to predict the uncertainty of an individual from their language use. We experiment with scaling methods, bagging, and demographic context for this regression task, conducting experiments on three dialogue corpora (social, negotiation, task-oriented) with eight LMs. While LMs can explain up to 7% variance in the uncertainty of others, we highlight the difficulty of the tasks and room for future work, especially in tasks that require explicit shifts in perspective.</abstract>
<identifier type="citekey">sicilia-alikhani-2025-evaluating</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.395</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.395/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>8007</start>
<end>8021</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Theory of (an uncertain) Mind: Predicting the Uncertain Beliefs of Others from Conversational Cues
%A Sicilia, Anthony
%A Alikhani, Malihe
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F sicilia-alikhani-2025-evaluating
%X Typically, when evaluating Theory of Mind, we consider the beliefs of others to be binary: held or not held. But what if someone is unsure about their own beliefs? How can we quantify this uncertainty? We propose a new suite of tasks, challenging language models (LMs) to model the uncertainty of participants in a dialogue. We design these tasks around conversation forecasting, where the goal is to predict the probability of an unobserved conversation outcome. Uniquely, we view conversation agents themselves as forecasters, asking an LM to predict the uncertainty of an individual from their language use. We experiment with scaling methods, bagging, and demographic context for this regression task, conducting experiments on three dialogue corpora (social, negotiation, task-oriented) with eight LMs. While LMs can explain up to 7% variance in the uncertainty of others, we highlight the difficulty of the tasks and room for future work, especially in tasks that require explicit shifts in perspective.
%R 10.18653/v1/2025.acl-long.395
%U https://aclanthology.org/2025.acl-long.395/
%U https://doi.org/10.18653/v1/2025.acl-long.395
%P 8007-8021
Markdown (Informal)
[Evaluating Theory of (an uncertain) Mind: Predicting the Uncertain Beliefs of Others from Conversational Cues](https://aclanthology.org/2025.acl-long.395/) (Sicilia & Alikhani, ACL 2025)
ACL