@inproceedings{kirch-etal-2025-features,
    title = "What Features in Prompts Jailbreak {LLM}s? Investigating the Mechanisms Behind Attacks",
    author = "Kirch, Nathalie Maria  and
      Weisser, Constantin Niko  and
      Field, Severin  and
      Yannakoudakis, Helen  and
      Casper, Stephen",
    editor = "Belinkov, Yonatan  and
      Mueller, Aaron  and
      Kim, Najoung  and
      Mohebbi, Hosein  and
      Chen, Hanjie  and
      Arad, Dana  and
      Sarti, Gabriele",
    booktitle = "Proceedings of the 8th BlackboxNLP Workshop: Analyzing and Interpreting Neural Networks for NLP",
    month = nov,
    year = "2025",
    address = "Suzhou, China",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2025.blackboxnlp-1.28/",
    doi = "10.18653/v1/2025.blackboxnlp-1.28",
    pages = "480--520",
    ISBN = "979-8-89176-346-3"
}