@inproceedings{de-bruyn-etal-2022-smaller,
title = "Is It Smaller Than a Tennis Ball? Language Models Play the Game of Twenty Questions",
author = "De Bruyn, Maxime and
Lotfi, Ehsan and
Buhmann, Jeska and
Daelemans, Walter",
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Elazar, Yanai and
Hupkes, Dieuwke and
Saphra, Naomi and
Wiegreffe, Sarah",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.blackboxnlp-1.7",
doi = "10.18653/v1/2022.blackboxnlp-1.7",
pages = "80--90",
abstract = "Researchers often use games to analyze the abilities of Artificial Intelligence models. In this work, we use the game of Twenty Questions to study the world knowledge of language models. Despite its simplicity for humans, this game requires a broad knowledge of the world to answer yes/no questions. We evaluate several language models on this task and find that only the largest model has enough world knowledge to play it well, although it still has difficulties with the shape and size of objects. We also present a new method to improve the knowledge of smaller models by leveraging external information from the web. Finally, we release our dataset and Twentle, a website to interactively test the knowledge of language models by playing Twenty Questions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-bruyn-etal-2022-smaller">
<titleInfo>
<title>Is It Smaller Than a Tennis Ball? Language Models Play the Game of Twenty Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maxime</namePart>
<namePart type="family">De Bruyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Lotfi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeska</namePart>
<namePart type="family">Buhmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walter</namePart>
<namePart type="family">Daelemans</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Saphra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Wiegreffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Researchers often use games to analyze the abilities of Artificial Intelligence models. In this work, we use the game of Twenty Questions to study the world knowledge of language models. Despite its simplicity for humans, this game requires a broad knowledge of the world to answer yes/no questions. We evaluate several language models on this task and find that only the largest model has enough world knowledge to play it well, although it still has difficulties with the shape and size of objects. We also present a new method to improve the knowledge of smaller models by leveraging external information from the web. Finally, we release our dataset and Twentle, a website to interactively test the knowledge of language models by playing Twenty Questions.</abstract>
<identifier type="citekey">de-bruyn-etal-2022-smaller</identifier>
<identifier type="doi">10.18653/v1/2022.blackboxnlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2022.blackboxnlp-1.7</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>80</start>
<end>90</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is It Smaller Than a Tennis Ball? Language Models Play the Game of Twenty Questions
%A De Bruyn, Maxime
%A Lotfi, Ehsan
%A Buhmann, Jeska
%A Daelemans, Walter
%Y Bastings, Jasmijn
%Y Belinkov, Yonatan
%Y Elazar, Yanai
%Y Hupkes, Dieuwke
%Y Saphra, Naomi
%Y Wiegreffe, Sarah
%S Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F de-bruyn-etal-2022-smaller
%X Researchers often use games to analyze the abilities of Artificial Intelligence models. In this work, we use the game of Twenty Questions to study the world knowledge of language models. Despite its simplicity for humans, this game requires a broad knowledge of the world to answer yes/no questions. We evaluate several language models on this task and find that only the largest model has enough world knowledge to play it well, although it still has difficulties with the shape and size of objects. We also present a new method to improve the knowledge of smaller models by leveraging external information from the web. Finally, we release our dataset and Twentle, a website to interactively test the knowledge of language models by playing Twenty Questions.
%R 10.18653/v1/2022.blackboxnlp-1.7
%U https://aclanthology.org/2022.blackboxnlp-1.7
%U https://doi.org/10.18653/v1/2022.blackboxnlp-1.7
%P 80-90
Markdown (Informal)
[Is It Smaller Than a Tennis Ball? Language Models Play the Game of Twenty Questions](https://aclanthology.org/2022.blackboxnlp-1.7) (De Bruyn et al., BlackboxNLP 2022)
ACL