@inproceedings{sadallah-etal-2025-makes,
title = "What Makes Cryptic Crosswords Challenging for {LLM}s?",
author = "Sadallah, Abdelrahman and
Kotova, Daria and
Kochmar, Ekaterina",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.342/",
pages = "5102--5114",
abstract = "Cryptic crosswords are puzzles that rely on general knowledge and the solver`s ability to manipulate language on different levels, dealing with various types of wordplay. Previous research suggests that solving such puzzles is challenging even for modern NLP models, including Large Language Models (LLMs). However, there is little to no research on the reasons for their poor performance on this task. In this paper, we establish the benchmark results for three popular LLMs: Gemma2, LLaMA3 and ChatGPT, showing that their performance on this task is still significantly below that of humans. We also investigate why these models struggle to achieve superior performance. We release our code and introduced datasets at https://github.com/bodasadallah/decrypting-crosswords."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadallah-etal-2025-makes">
<titleInfo>
<title>What Makes Cryptic Crosswords Challenging for LLMs?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdelrahman</namePart>
<namePart type="family">Sadallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daria</namePart>
<namePart type="family">Kotova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cryptic crosswords are puzzles that rely on general knowledge and the solver‘s ability to manipulate language on different levels, dealing with various types of wordplay. Previous research suggests that solving such puzzles is challenging even for modern NLP models, including Large Language Models (LLMs). However, there is little to no research on the reasons for their poor performance on this task. In this paper, we establish the benchmark results for three popular LLMs: Gemma2, LLaMA3 and ChatGPT, showing that their performance on this task is still significantly below that of humans. We also investigate why these models struggle to achieve superior performance. We release our code and introduced datasets at https://github.com/bodasadallah/decrypting-crosswords.</abstract>
<identifier type="citekey">sadallah-etal-2025-makes</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.342/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>5102</start>
<end>5114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What Makes Cryptic Crosswords Challenging for LLMs?
%A Sadallah, Abdelrahman
%A Kotova, Daria
%A Kochmar, Ekaterina
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F sadallah-etal-2025-makes
%X Cryptic crosswords are puzzles that rely on general knowledge and the solver‘s ability to manipulate language on different levels, dealing with various types of wordplay. Previous research suggests that solving such puzzles is challenging even for modern NLP models, including Large Language Models (LLMs). However, there is little to no research on the reasons for their poor performance on this task. In this paper, we establish the benchmark results for three popular LLMs: Gemma2, LLaMA3 and ChatGPT, showing that their performance on this task is still significantly below that of humans. We also investigate why these models struggle to achieve superior performance. We release our code and introduced datasets at https://github.com/bodasadallah/decrypting-crosswords.
%U https://aclanthology.org/2025.coling-main.342/
%P 5102-5114
Markdown (Informal)
[What Makes Cryptic Crosswords Challenging for LLMs?](https://aclanthology.org/2025.coling-main.342/) (Sadallah et al., COLING 2025)
ACL
- Abdelrahman Sadallah, Daria Kotova, and Ekaterina Kochmar. 2025. What Makes Cryptic Crosswords Challenging for LLMs?. In Proceedings of the 31st International Conference on Computational Linguistics, pages 5102–5114, Abu Dhabi, UAE. Association for Computational Linguistics.