@inproceedings{mu-etal-2025-stealthy, title = "Stealthy Jailbreak Attacks on Large Language Models via Benign Data Mirroring", author = "Mu, Honglin and He, Han and Zhou, Yuxin and Feng, Yunlong and Xu, Yang and Qin, Libo and Shi, Xiaoming and Liu, Zeming and Han, Xudong and Shi, Qi and Zhu, Qingfu and Che, Wanxiang", editor = "Chiruzzo, Luis and Ritter, Alan and Wang, Lu", booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)", month = apr, year = "2025", address = "Albuquerque, New Mexico", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2025.naacl-long.88/", pages = "1784--1799", ISBN = "979-8-89176-189-6" }