@inproceedings{nematzadeh-etal-2018-evaluating,
title = "Evaluating Theory of Mind in Question Answering",
author = "Nematzadeh, Aida and
Burns, Kaylee and
Grant, Erin and
Gopnik, Alison and
Griffiths, Tom",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1261",
doi = "10.18653/v1/D18-1261",
pages = "2392--2400",
abstract = "We propose a new dataset for evaluating question answering models with respect to their capacity to reason about beliefs. Our tasks are inspired by theory-of-mind experiments that examine whether children are able to reason about the beliefs of others, in particular when those beliefs differ from reality. We evaluate a number of recent neural models with memory augmentation. We find that all fail on our tasks, which require keeping track of inconsistent states of the world; moreover, the models{'} accuracy decreases notably when random sentences are introduced to the tasks at test.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nematzadeh-etal-2018-evaluating">
<titleInfo>
<title>Evaluating Theory of Mind in Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Nematzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaylee</namePart>
<namePart type="family">Burns</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erin</namePart>
<namePart type="family">Grant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alison</namePart>
<namePart type="family">Gopnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Griffiths</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a new dataset for evaluating question answering models with respect to their capacity to reason about beliefs. Our tasks are inspired by theory-of-mind experiments that examine whether children are able to reason about the beliefs of others, in particular when those beliefs differ from reality. We evaluate a number of recent neural models with memory augmentation. We find that all fail on our tasks, which require keeping track of inconsistent states of the world; moreover, the models’ accuracy decreases notably when random sentences are introduced to the tasks at test.</abstract>
<identifier type="citekey">nematzadeh-etal-2018-evaluating</identifier>
<identifier type="doi">10.18653/v1/D18-1261</identifier>
<location>
<url>https://aclanthology.org/D18-1261</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>2392</start>
<end>2400</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Theory of Mind in Question Answering
%A Nematzadeh, Aida
%A Burns, Kaylee
%A Grant, Erin
%A Gopnik, Alison
%A Griffiths, Tom
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F nematzadeh-etal-2018-evaluating
%X We propose a new dataset for evaluating question answering models with respect to their capacity to reason about beliefs. Our tasks are inspired by theory-of-mind experiments that examine whether children are able to reason about the beliefs of others, in particular when those beliefs differ from reality. We evaluate a number of recent neural models with memory augmentation. We find that all fail on our tasks, which require keeping track of inconsistent states of the world; moreover, the models’ accuracy decreases notably when random sentences are introduced to the tasks at test.
%R 10.18653/v1/D18-1261
%U https://aclanthology.org/D18-1261
%U https://doi.org/10.18653/v1/D18-1261
%P 2392-2400
Markdown (Informal)
[Evaluating Theory of Mind in Question Answering](https://aclanthology.org/D18-1261) (Nematzadeh et al., EMNLP 2018)
ACL
- Aida Nematzadeh, Kaylee Burns, Erin Grant, Alison Gopnik, and Tom Griffiths. 2018. Evaluating Theory of Mind in Question Answering. In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pages 2392–2400, Brussels, Belgium. Association for Computational Linguistics.