@inproceedings{manna-etal-2024-riddle,
title = "Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games",
author = "Manna, Raffaele and
di Buono, Maria Pia and
Monti, Johanna",
editor = "Madge, Chris and
Chamberlain, Jon and
Fort, Karen and
Kruschwitz, Udo and
Lukin, Stephanie",
booktitle = "Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.games-1.11",
pages = "97--106",
abstract = "In this contribution, we examine the proficiency of Large Language Models (LLMs) in solving the linguistic game {``}La Ghigliottina,{''} the final game of the popular Italian TV quiz show {``}L{'}Eredit{\`a}{''}. This game is particularly challenging as it requires LLMs to engage in semantic inference reasoning for identifying the solutions of the game. Our experiment draws inspiration from Ghigliottin-AI, a task of EVALITA 2020, an evaluation campaign focusing on Natural Language Processing (NLP) and speech tools designed for the Italian language. To benchmark our experiment, we use the results of the most successful artificial player in this task, namely Il Mago della Ghigliottina. The paper describes the experimental setting and the results which show that LLMs perform poorly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="manna-etal-2024-riddle">
<titleInfo>
<title>Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raffaele</namePart>
<namePart type="family">Manna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Pia</namePart>
<namePart type="family">di Buono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johanna</namePart>
<namePart type="family">Monti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Madge</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jon</namePart>
<namePart type="family">Chamberlain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karen</namePart>
<namePart type="family">Fort</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Kruschwitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephanie</namePart>
<namePart type="family">Lukin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this contribution, we examine the proficiency of Large Language Models (LLMs) in solving the linguistic game “La Ghigliottina,” the final game of the popular Italian TV quiz show “L’Eredità”. This game is particularly challenging as it requires LLMs to engage in semantic inference reasoning for identifying the solutions of the game. Our experiment draws inspiration from Ghigliottin-AI, a task of EVALITA 2020, an evaluation campaign focusing on Natural Language Processing (NLP) and speech tools designed for the Italian language. To benchmark our experiment, we use the results of the most successful artificial player in this task, namely Il Mago della Ghigliottina. The paper describes the experimental setting and the results which show that LLMs perform poorly.</abstract>
<identifier type="citekey">manna-etal-2024-riddle</identifier>
<location>
<url>https://aclanthology.org/2024.games-1.11</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>97</start>
<end>106</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games
%A Manna, Raffaele
%A di Buono, Maria Pia
%A Monti, Johanna
%Y Madge, Chris
%Y Chamberlain, Jon
%Y Fort, Karen
%Y Kruschwitz, Udo
%Y Lukin, Stephanie
%S Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F manna-etal-2024-riddle
%X In this contribution, we examine the proficiency of Large Language Models (LLMs) in solving the linguistic game “La Ghigliottina,” the final game of the popular Italian TV quiz show “L’Eredità”. This game is particularly challenging as it requires LLMs to engage in semantic inference reasoning for identifying the solutions of the game. Our experiment draws inspiration from Ghigliottin-AI, a task of EVALITA 2020, an evaluation campaign focusing on Natural Language Processing (NLP) and speech tools designed for the Italian language. To benchmark our experiment, we use the results of the most successful artificial player in this task, namely Il Mago della Ghigliottina. The paper describes the experimental setting and the results which show that LLMs perform poorly.
%U https://aclanthology.org/2024.games-1.11
%P 97-106
Markdown (Informal)
[Riddle Me This: Evaluating Large Language Models in Solving Word-Based Games](https://aclanthology.org/2024.games-1.11) (Manna et al., games-WS 2024)
ACL