@inproceedings{zhan-etal-2025-adaptive,
title = "Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on {LLM} Agents",
author = "Zhan, Qiusi and
Fang, Richard and
Panchal, Henil Shalin and
Kang, Daniel",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.395/",
doi = "10.18653/v1/2025.findings-naacl.395",
pages = "7101--7117",
ISBN = "979-8-89176-195-7",
abstract = "Large Language Model (LLM) agents exhibit remarkable performance across diverse applications by using external tools to interact with environments. However, integrating external tools introduces security risks, such as indirect prompt injection (IPI) attacks. Despite defenses designed for IPI attacks, their robustness remains questionable due to insufficient testing against adaptive attacks.In this paper, we evaluate eight different defenses and bypass all of them using adaptive attacks, consistently achieving an attack success rate of over 50{\%}.This reveals critical vulnerabilities in current defenses. Our research underscores the need for adaptive attack evaluation when designing defenses to ensure robustness and reliability.The code is available at https://github.com/uiuc-kang-lab/AdaptiveAttackAgent."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhan-etal-2025-adaptive">
<titleInfo>
<title>Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Qiusi</namePart>
<namePart type="family">Zhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henil</namePart>
<namePart type="given">Shalin</namePart>
<namePart type="family">Panchal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Large Language Model (LLM) agents exhibit remarkable performance across diverse applications by using external tools to interact with environments. However, integrating external tools introduces security risks, such as indirect prompt injection (IPI) attacks. Despite defenses designed for IPI attacks, their robustness remains questionable due to insufficient testing against adaptive attacks.In this paper, we evaluate eight different defenses and bypass all of them using adaptive attacks, consistently achieving an attack success rate of over 50%.This reveals critical vulnerabilities in current defenses. Our research underscores the need for adaptive attack evaluation when designing defenses to ensure robustness and reliability.The code is available at https://github.com/uiuc-kang-lab/AdaptiveAttackAgent.</abstract>
<identifier type="citekey">zhan-etal-2025-adaptive</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.395</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.395/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>7101</start>
<end>7117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents
%A Zhan, Qiusi
%A Fang, Richard
%A Panchal, Henil Shalin
%A Kang, Daniel
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F zhan-etal-2025-adaptive
%X Large Language Model (LLM) agents exhibit remarkable performance across diverse applications by using external tools to interact with environments. However, integrating external tools introduces security risks, such as indirect prompt injection (IPI) attacks. Despite defenses designed for IPI attacks, their robustness remains questionable due to insufficient testing against adaptive attacks.In this paper, we evaluate eight different defenses and bypass all of them using adaptive attacks, consistently achieving an attack success rate of over 50%.This reveals critical vulnerabilities in current defenses. Our research underscores the need for adaptive attack evaluation when designing defenses to ensure robustness and reliability.The code is available at https://github.com/uiuc-kang-lab/AdaptiveAttackAgent.
%R 10.18653/v1/2025.findings-naacl.395
%U https://aclanthology.org/2025.findings-naacl.395/
%U https://doi.org/10.18653/v1/2025.findings-naacl.395
%P 7101-7117
Markdown (Informal)
[Adaptive Attacks Break Defenses Against Indirect Prompt Injection Attacks on LLM Agents](https://aclanthology.org/2025.findings-naacl.395/) (Zhan et al., Findings 2025)
ACL