@inproceedings{huang-etal-2024-notion,
title = "A Notion of Complexity for Theory of Mind via Discrete World Models",
author = "Huang, X. and
La Malfa, Emanuele and
Marro, Samuele and
Asperti, Andrea and
Cohn, Anthony and
Wooldridge, Michael",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.167",
pages = "2964--2983",
abstract = "Theory of Mind (ToM) can be used to assess the capabilities of Large Language Models (LLMs) in complex scenarios where social reasoning is required. While the research community has proposed many ToM benchmarks, their hardness varies greatly, and their complexity is not well defined. This work proposes a framework inspired by cognitive load theory to measure the complexity of ToM tasks. We quantify a problem{'}s complexity as the number of states necessary to solve it correctly. Our complexity measure also accounts for spurious states of a ToM problem designed to make it apparently harder. We use our method to assess the complexity of five widely adopted ToM benchmarks. On top of this framework, we design a prompting technique that augments the information available to a model with a description of how the environment changes with the agents{'} interactions. We name this technique Discrete World Models (DWM) and show how it elicits superior performance on ToM tasks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="huang-etal-2024-notion">
<titleInfo>
<title>A Notion of Complexity for Theory of Mind via Discrete World Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">X</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emanuele</namePart>
<namePart type="family">La Malfa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuele</namePart>
<namePart type="family">Marro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Asperti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anthony</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Wooldridge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Theory of Mind (ToM) can be used to assess the capabilities of Large Language Models (LLMs) in complex scenarios where social reasoning is required. While the research community has proposed many ToM benchmarks, their hardness varies greatly, and their complexity is not well defined. This work proposes a framework inspired by cognitive load theory to measure the complexity of ToM tasks. We quantify a problem’s complexity as the number of states necessary to solve it correctly. Our complexity measure also accounts for spurious states of a ToM problem designed to make it apparently harder. We use our method to assess the complexity of five widely adopted ToM benchmarks. On top of this framework, we design a prompting technique that augments the information available to a model with a description of how the environment changes with the agents’ interactions. We name this technique Discrete World Models (DWM) and show how it elicits superior performance on ToM tasks.</abstract>
<identifier type="citekey">huang-etal-2024-notion</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.167</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>2964</start>
<end>2983</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Notion of Complexity for Theory of Mind via Discrete World Models
%A Huang, X.
%A La Malfa, Emanuele
%A Marro, Samuele
%A Asperti, Andrea
%A Cohn, Anthony
%A Wooldridge, Michael
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F huang-etal-2024-notion
%X Theory of Mind (ToM) can be used to assess the capabilities of Large Language Models (LLMs) in complex scenarios where social reasoning is required. While the research community has proposed many ToM benchmarks, their hardness varies greatly, and their complexity is not well defined. This work proposes a framework inspired by cognitive load theory to measure the complexity of ToM tasks. We quantify a problem’s complexity as the number of states necessary to solve it correctly. Our complexity measure also accounts for spurious states of a ToM problem designed to make it apparently harder. We use our method to assess the complexity of five widely adopted ToM benchmarks. On top of this framework, we design a prompting technique that augments the information available to a model with a description of how the environment changes with the agents’ interactions. We name this technique Discrete World Models (DWM) and show how it elicits superior performance on ToM tasks.
%U https://aclanthology.org/2024.findings-emnlp.167
%P 2964-2983
Markdown (Informal)
[A Notion of Complexity for Theory of Mind via Discrete World Models](https://aclanthology.org/2024.findings-emnlp.167) (Huang et al., Findings 2024)
ACL