@inproceedings{soubki-etal-2024-views,
title = "Views Are My Own, but Also Yours: Benchmarking Theory of Mind Using Common Ground",
author = "Soubki, Adil and
Murzaku, John and
Yousefi Jordehi, Arash and
Zeng, Peter and
Markowska, Magdalena and
Mirroshandel, Seyed Abolghasem and
Rambow, Owen",
editor = "Ku, Lun-Wei and
Martins, Andre and
Srikumar, Vivek",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2024",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-acl.880",
doi = "10.18653/v1/2024.findings-acl.880",
pages = "14815--14823",
abstract = "Evaluating the theory of mind (ToM) capabilities of language models (LMs) has recently received a great deal of attention. However, many existing benchmarks rely on synthetic data, which risks misaligning the resulting experiments with human behavior. We introduce the first ToM dataset based on naturally occurring spoken dialogs, Common-ToM, and show that LMs struggle to demonstrate ToM. We then show that integrating a simple, explicit representation of beliefs improves LM performance on Common-ToM.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="soubki-etal-2024-views">
<titleInfo>
<title>Views Are My Own, but Also Yours: Benchmarking Theory of Mind Using Common Ground</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adil</namePart>
<namePart type="family">Soubki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Murzaku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arash</namePart>
<namePart type="family">Yousefi Jordehi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Zeng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalena</namePart>
<namePart type="family">Markowska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seyed</namePart>
<namePart type="given">Abolghasem</namePart>
<namePart type="family">Mirroshandel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lun-Wei</namePart>
<namePart type="family">Ku</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andre</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivek</namePart>
<namePart type="family">Srikumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Evaluating the theory of mind (ToM) capabilities of language models (LMs) has recently received a great deal of attention. However, many existing benchmarks rely on synthetic data, which risks misaligning the resulting experiments with human behavior. We introduce the first ToM dataset based on naturally occurring spoken dialogs, Common-ToM, and show that LMs struggle to demonstrate ToM. We then show that integrating a simple, explicit representation of beliefs improves LM performance on Common-ToM.</abstract>
<identifier type="citekey">soubki-etal-2024-views</identifier>
<identifier type="doi">10.18653/v1/2024.findings-acl.880</identifier>
<location>
<url>https://aclanthology.org/2024.findings-acl.880</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>14815</start>
<end>14823</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Views Are My Own, but Also Yours: Benchmarking Theory of Mind Using Common Ground
%A Soubki, Adil
%A Murzaku, John
%A Yousefi Jordehi, Arash
%A Zeng, Peter
%A Markowska, Magdalena
%A Mirroshandel, Seyed Abolghasem
%A Rambow, Owen
%Y Ku, Lun-Wei
%Y Martins, Andre
%Y Srikumar, Vivek
%S Findings of the Association for Computational Linguistics: ACL 2024
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F soubki-etal-2024-views
%X Evaluating the theory of mind (ToM) capabilities of language models (LMs) has recently received a great deal of attention. However, many existing benchmarks rely on synthetic data, which risks misaligning the resulting experiments with human behavior. We introduce the first ToM dataset based on naturally occurring spoken dialogs, Common-ToM, and show that LMs struggle to demonstrate ToM. We then show that integrating a simple, explicit representation of beliefs improves LM performance on Common-ToM.
%R 10.18653/v1/2024.findings-acl.880
%U https://aclanthology.org/2024.findings-acl.880
%U https://doi.org/10.18653/v1/2024.findings-acl.880
%P 14815-14823
Markdown (Informal)
[Views Are My Own, but Also Yours: Benchmarking Theory of Mind Using Common Ground](https://aclanthology.org/2024.findings-acl.880) (Soubki et al., Findings 2024)
ACL
- Adil Soubki, John Murzaku, Arash Yousefi Jordehi, Peter Zeng, Magdalena Markowska, Seyed Abolghasem Mirroshandel, and Owen Rambow. 2024. Views Are My Own, but Also Yours: Benchmarking Theory of Mind Using Common Ground. In Findings of the Association for Computational Linguistics: ACL 2024, pages 14815–14823, Bangkok, Thailand. Association for Computational Linguistics.