@inproceedings{sadasivan-etal-2026-attackers,
title = "Attacker{'}s Noise Can Manipulate Your Audio-based {LLM} in the Real World",
author = "Sadasivan, Vinu Sankar and
Feizi, Soheil and
Mathews, Rajiv and
Wang, Lun",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.66/",
pages = "1430--1440",
ISBN = "979-8-89176-380-7",
abstract = "This paper investigates the real-world vulnerabilities of audio-based large language models (ALLMs), such as Qwen2-Audio. We first demonstrate that an adversary can craft stealthy audio perturbations to manipulate ALLMs into exhibiting specific targeted behaviors, such as eliciting responses to wake-keywords (e.g., ``Hey Qwen''), or triggering harmful behaviors (e.g., ``Change my calendar event''). Subsequently, we show that playing adversarial background noise during user interaction with the ALLMs can significantly degrade the response quality. Crucially, our research illustrates the scalability of these attacks to real-world scenarios, impacting other innocent users when these adversarial noises are played through the air. Further, we discuss the transferability of the attack and potential defensive measures."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadasivan-etal-2026-attackers">
<titleInfo>
<title>Attacker’s Noise Can Manipulate Your Audio-based LLM in the Real World</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vinu</namePart>
<namePart type="given">Sankar</namePart>
<namePart type="family">Sadasivan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soheil</namePart>
<namePart type="family">Feizi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajiv</namePart>
<namePart type="family">Mathews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lun</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>This paper investigates the real-world vulnerabilities of audio-based large language models (ALLMs), such as Qwen2-Audio. We first demonstrate that an adversary can craft stealthy audio perturbations to manipulate ALLMs into exhibiting specific targeted behaviors, such as eliciting responses to wake-keywords (e.g., “Hey Qwen”), or triggering harmful behaviors (e.g., “Change my calendar event”). Subsequently, we show that playing adversarial background noise during user interaction with the ALLMs can significantly degrade the response quality. Crucially, our research illustrates the scalability of these attacks to real-world scenarios, impacting other innocent users when these adversarial noises are played through the air. Further, we discuss the transferability of the attack and potential defensive measures.</abstract>
<identifier type="citekey">sadasivan-etal-2026-attackers</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.66/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>1430</start>
<end>1440</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Attacker’s Noise Can Manipulate Your Audio-based LLM in the Real World
%A Sadasivan, Vinu Sankar
%A Feizi, Soheil
%A Mathews, Rajiv
%A Wang, Lun
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F sadasivan-etal-2026-attackers
%X This paper investigates the real-world vulnerabilities of audio-based large language models (ALLMs), such as Qwen2-Audio. We first demonstrate that an adversary can craft stealthy audio perturbations to manipulate ALLMs into exhibiting specific targeted behaviors, such as eliciting responses to wake-keywords (e.g., “Hey Qwen”), or triggering harmful behaviors (e.g., “Change my calendar event”). Subsequently, we show that playing adversarial background noise during user interaction with the ALLMs can significantly degrade the response quality. Crucially, our research illustrates the scalability of these attacks to real-world scenarios, impacting other innocent users when these adversarial noises are played through the air. Further, we discuss the transferability of the attack and potential defensive measures.
%U https://aclanthology.org/2026.eacl-long.66/
%P 1430-1440
Markdown (Informal)
[Attacker’s Noise Can Manipulate Your Audio-based LLM in the Real World](https://aclanthology.org/2026.eacl-long.66/) (Sadasivan et al., EACL 2026)
ACL
- Vinu Sankar Sadasivan, Soheil Feizi, Rajiv Mathews, and Lun Wang. 2026. Attacker’s Noise Can Manipulate Your Audio-based LLM in the Real World. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers), pages 1430–1440, Rabat, Morocco. Association for Computational Linguistics.