@inproceedings{fu-etal-2024-vulnerabilities,
title = "Vulnerabilities of Large Language Models to Adversarial Attacks",
author = "Fu, Yu and
Shayegan, Erfan and
Abdullah, Md. Mamun Al and
Zaree, Pedram and
Abu-Ghazaleh, Nael and
Dong, Yue",
editor = "Chiruzzo, Luis and
Lee, Hung-yi and
Ribeiro, Leonardo",
booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 5: Tutorial Abstracts)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.luhme-tutorials.5/",
doi = "10.18653/v1/2024.acl-tutorials.5",
pages = "8--9",
abstract = "This tutorial serves as a comprehensive guide on the vulnerabilities of Large Language Models (LLMs) to adversarial attacks, an interdisciplinary field that blends perspectives from Natural Language Processing (NLP) and Cybersecurity. As LLMs become more complex and integrated into various systems, understanding their security attributes is crucial. However, current research indicates that even safety-aligned models are not impervious to adversarial attacks that can result in incorrect or harmful outputs. The tutorial first lays the foundation by explaining safety-aligned LLMs and concepts in cybersecurity. It then categorizes existing research based on different types of learning architectures and attack methods. We highlight the existing vulnerabilities of unimodal LLMs, multi-modal LLMs, and systems that integrate LLMs, focusing on adversarial attacks designed to exploit weaknesses and mislead AI systems. Finally, the tutorial delves into the potential causes of these vulnerabilities and discusses potential defense mechanisms."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fu-etal-2024-vulnerabilities">
<titleInfo>
<title>Vulnerabilities of Large Language Models to Adversarial Attacks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erfan</namePart>
<namePart type="family">Shayegan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md.</namePart>
<namePart type="given">Mamun</namePart>
<namePart type="given">Al</namePart>
<namePart type="family">Abdullah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedram</namePart>
<namePart type="family">Zaree</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nael</namePart>
<namePart type="family">Abu-Ghazaleh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 5: Tutorial Abstracts)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hung-yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonardo</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This tutorial serves as a comprehensive guide on the vulnerabilities of Large Language Models (LLMs) to adversarial attacks, an interdisciplinary field that blends perspectives from Natural Language Processing (NLP) and Cybersecurity. As LLMs become more complex and integrated into various systems, understanding their security attributes is crucial. However, current research indicates that even safety-aligned models are not impervious to adversarial attacks that can result in incorrect or harmful outputs. The tutorial first lays the foundation by explaining safety-aligned LLMs and concepts in cybersecurity. It then categorizes existing research based on different types of learning architectures and attack methods. We highlight the existing vulnerabilities of unimodal LLMs, multi-modal LLMs, and systems that integrate LLMs, focusing on adversarial attacks designed to exploit weaknesses and mislead AI systems. Finally, the tutorial delves into the potential causes of these vulnerabilities and discusses potential defense mechanisms.</abstract>
<identifier type="citekey">fu-etal-2024-vulnerabilities</identifier>
<identifier type="doi">10.18653/v1/2024.acl-tutorials.5</identifier>
<location>
<url>https://aclanthology.org/2024.luhme-tutorials.5/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>8</start>
<end>9</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Vulnerabilities of Large Language Models to Adversarial Attacks
%A Fu, Yu
%A Shayegan, Erfan
%A Abdullah, Md. Mamun Al
%A Zaree, Pedram
%A Abu-Ghazaleh, Nael
%A Dong, Yue
%Y Chiruzzo, Luis
%Y Lee, Hung-yi
%Y Ribeiro, Leonardo
%S Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 5: Tutorial Abstracts)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F fu-etal-2024-vulnerabilities
%X This tutorial serves as a comprehensive guide on the vulnerabilities of Large Language Models (LLMs) to adversarial attacks, an interdisciplinary field that blends perspectives from Natural Language Processing (NLP) and Cybersecurity. As LLMs become more complex and integrated into various systems, understanding their security attributes is crucial. However, current research indicates that even safety-aligned models are not impervious to adversarial attacks that can result in incorrect or harmful outputs. The tutorial first lays the foundation by explaining safety-aligned LLMs and concepts in cybersecurity. It then categorizes existing research based on different types of learning architectures and attack methods. We highlight the existing vulnerabilities of unimodal LLMs, multi-modal LLMs, and systems that integrate LLMs, focusing on adversarial attacks designed to exploit weaknesses and mislead AI systems. Finally, the tutorial delves into the potential causes of these vulnerabilities and discusses potential defense mechanisms.
%R 10.18653/v1/2024.acl-tutorials.5
%U https://aclanthology.org/2024.luhme-tutorials.5/
%U https://doi.org/10.18653/v1/2024.acl-tutorials.5
%P 8-9
Markdown (Informal)
[Vulnerabilities of Large Language Models to Adversarial Attacks](https://aclanthology.org/2024.luhme-tutorials.5/) (Fu et al., ACL 2024)
ACL
- Yu Fu, Erfan Shayegan, Md. Mamun Al Abdullah, Pedram Zaree, Nael Abu-Ghazaleh, and Yue Dong. 2024. Vulnerabilities of Large Language Models to Adversarial Attacks. In Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 5: Tutorial Abstracts), pages 8–9, Bangkok, Thailand. Association for Computational Linguistics.