@inproceedings{melamed-etal-2025-demystifying,
title = "Demystifying optimized prompts in language models",
author = "Melamed, Rimon and
McCabe, Lucas Hurley and
Huang, H Howie",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.147/",
pages = "2983--2999",
ISBN = "979-8-89176-332-6",
abstract = "Modern language models (LMs) are not robust to out-of-distribution inputs. Machine generated ({``}optimized'') prompts can be used to modulate LM outputs and induce specific behaviors while appearing completely uninterpretable. In this work, we investigate the composition of optimized prompts, as well as the mechanisms by which LMs parse and build predictions from optimized prompts. We find that optimized prompts primarily consist of punctuation and noun tokens which are more rare in the training data. Internally, optimized prompts are clearly distinguishable from natural language counterparts based on sparse subsets of the model{'}s activations. Across various families of instruction-tuned models, optimized prompts follow a similar path in how their representations form through the network."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="melamed-etal-2025-demystifying">
<titleInfo>
<title>Demystifying optimized prompts in language models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rimon</namePart>
<namePart type="family">Melamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucas</namePart>
<namePart type="given">Hurley</namePart>
<namePart type="family">McCabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">Howie</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Modern language models (LMs) are not robust to out-of-distribution inputs. Machine generated (“optimized”) prompts can be used to modulate LM outputs and induce specific behaviors while appearing completely uninterpretable. In this work, we investigate the composition of optimized prompts, as well as the mechanisms by which LMs parse and build predictions from optimized prompts. We find that optimized prompts primarily consist of punctuation and noun tokens which are more rare in the training data. Internally, optimized prompts are clearly distinguishable from natural language counterparts based on sparse subsets of the model’s activations. Across various families of instruction-tuned models, optimized prompts follow a similar path in how their representations form through the network.</abstract>
<identifier type="citekey">melamed-etal-2025-demystifying</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.147/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>2983</start>
<end>2999</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Demystifying optimized prompts in language models
%A Melamed, Rimon
%A McCabe, Lucas Hurley
%A Huang, H. Howie
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F melamed-etal-2025-demystifying
%X Modern language models (LMs) are not robust to out-of-distribution inputs. Machine generated (“optimized”) prompts can be used to modulate LM outputs and induce specific behaviors while appearing completely uninterpretable. In this work, we investigate the composition of optimized prompts, as well as the mechanisms by which LMs parse and build predictions from optimized prompts. We find that optimized prompts primarily consist of punctuation and noun tokens which are more rare in the training data. Internally, optimized prompts are clearly distinguishable from natural language counterparts based on sparse subsets of the model’s activations. Across various families of instruction-tuned models, optimized prompts follow a similar path in how their representations form through the network.
%U https://aclanthology.org/2025.emnlp-main.147/
%P 2983-2999
Markdown (Informal)
[Demystifying optimized prompts in language models](https://aclanthology.org/2025.emnlp-main.147/) (Melamed et al., EMNLP 2025)
ACL
- Rimon Melamed, Lucas Hurley McCabe, and H Howie Huang. 2025. Demystifying optimized prompts in language models. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing, pages 2983–2999, Suzhou, China. Association for Computational Linguistics.