@inproceedings{kirch-etal-2025-features, title = "What Features in Prompts Jailbreak {LLM}s? Investigating the Mechanisms Behind Attacks", author = "Kirch, Nathalie Maria and Weisser, Constantin Niko and Field, Severin and Yannakoudakis, Helen and Casper, Stephen", editor = "Belinkov, Yonatan and Mueller, Aaron and Kim, Najoung and Mohebbi, Hosein and Chen, Hanjie and Arad, Dana and Sarti, Gabriele", booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP", month = nov, year = "2025", address = "Suzhou, China", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2025.blackboxnlp-1.28/", pages = "480--520", ISBN = "979-8-89176-346-3" }