@inproceedings{maskey-etal-2025-benchmarking,
title = "Benchmarking Large Language Models for Cryptanalysis and Side-Channel Vulnerabilities",
author = "Maskey, Utsav and
Zhu, Chencheng and
Naseem, Usman",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1082/",
doi = "10.18653/v1/2025.findings-emnlp.1082",
pages = "19849--19865",
ISBN = "979-8-89176-335-7",
abstract = "Recent advancements in Large Language Models (LLMs) have transformed natural language understanding and generation, leading to extensive benchmarking across diverse tasks. However, cryptanalysis{---}a critical area for data security and its connection to LLMs' generalization abilities remains underexplored in LLM evaluations. To address this gap, we evaluate the cryptanalytic potential of state{-}of{-}the{-}art LLMs on ciphertexts produced by a range of cryptographic algorithms. We introduce a benchmark dataset of diverse plaintexts{---}spanning multiple domains, lengths, writing styles, and topics{---}paired with their encrypted versions. Using zero{-}shot and few{-}shot settings along with chain{-}of{-}thought prompting, we assess LLMs' decryption success rate and discuss their comprehension abilities. Our findings reveal key insights into LLMs' strengths and limitations in side{-}channel scenarios and raise concerns about their susceptibility to under-generalization related attacks. This research highlights the dual{-}use nature of LLMs in security contexts and contributes to the ongoing discussion on AI safety and security."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maskey-etal-2025-benchmarking">
<titleInfo>
<title>Benchmarking Large Language Models for Cryptanalysis and Side-Channel Vulnerabilities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Utsav</namePart>
<namePart type="family">Maskey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chencheng</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Usman</namePart>
<namePart type="family">Naseem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>Recent advancements in Large Language Models (LLMs) have transformed natural language understanding and generation, leading to extensive benchmarking across diverse tasks. However, cryptanalysis—a critical area for data security and its connection to LLMs’ generalization abilities remains underexplored in LLM evaluations. To address this gap, we evaluate the cryptanalytic potential of state-of-the-art LLMs on ciphertexts produced by a range of cryptographic algorithms. We introduce a benchmark dataset of diverse plaintexts—spanning multiple domains, lengths, writing styles, and topics—paired with their encrypted versions. Using zero-shot and few-shot settings along with chain-of-thought prompting, we assess LLMs’ decryption success rate and discuss their comprehension abilities. Our findings reveal key insights into LLMs’ strengths and limitations in side-channel scenarios and raise concerns about their susceptibility to under-generalization related attacks. This research highlights the dual-use nature of LLMs in security contexts and contributes to the ongoing discussion on AI safety and security.</abstract>
<identifier type="citekey">maskey-etal-2025-benchmarking</identifier>
<identifier type="doi">10.18653/v1/2025.findings-emnlp.1082</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1082/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>19849</start>
<end>19865</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking Large Language Models for Cryptanalysis and Side-Channel Vulnerabilities
%A Maskey, Utsav
%A Zhu, Chencheng
%A Naseem, Usman
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F maskey-etal-2025-benchmarking
%X Recent advancements in Large Language Models (LLMs) have transformed natural language understanding and generation, leading to extensive benchmarking across diverse tasks. However, cryptanalysis—a critical area for data security and its connection to LLMs’ generalization abilities remains underexplored in LLM evaluations. To address this gap, we evaluate the cryptanalytic potential of state-of-the-art LLMs on ciphertexts produced by a range of cryptographic algorithms. We introduce a benchmark dataset of diverse plaintexts—spanning multiple domains, lengths, writing styles, and topics—paired with their encrypted versions. Using zero-shot and few-shot settings along with chain-of-thought prompting, we assess LLMs’ decryption success rate and discuss their comprehension abilities. Our findings reveal key insights into LLMs’ strengths and limitations in side-channel scenarios and raise concerns about their susceptibility to under-generalization related attacks. This research highlights the dual-use nature of LLMs in security contexts and contributes to the ongoing discussion on AI safety and security.
%R 10.18653/v1/2025.findings-emnlp.1082
%U https://aclanthology.org/2025.findings-emnlp.1082/
%U https://doi.org/10.18653/v1/2025.findings-emnlp.1082
%P 19849-19865
Markdown (Informal)
[Benchmarking Large Language Models for Cryptanalysis and Side-Channel Vulnerabilities](https://aclanthology.org/2025.findings-emnlp.1082/) (Maskey et al., Findings 2025)
ACL