@inproceedings{sadr-etal-2026-zip,
title = "{ZIP}: Quantifying Which Words Matter in Zero-Shot Instructional Prompts",
author = "Sadr, Nikta Gohari and
Madhusudan, Sangmitra and
Asgari, Arash and
Sajjad, Hassan and
Seyyed-Kalantari, Laleh and
Emami, Ali",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.30/",
pages = "428--453",
ISBN = "979-8-89176-413-2",
abstract = "While zero-shot instructional prompts like ``Let{'}s think step-by-step'' have revolutionized Large Language Model performance, we lack systematic understanding of why: which specific words drive their effectiveness, and how do these patterns vary across tasks and models? We introduce the ZIP score (Zero-shot Importance of Perturbation), a metric that quantifies individual word importance through controlled, semantically meaningful perturbations. To enable rigorous evaluation, we also introduce the first ground-truth benchmark for prompt interpretability, a set of validation prompts with predetermined keywords where ZIP achieves 95.8{\%} accuracy compared to 65.8{\%} for LIME. Analyzing six flagship models across seven prompts and multiple task domains, we find that word importance is task-dependent ({''}step-by-step'' dominates mathematical reasoning; ``think'' matters more for common-sense tasks), varies systematically across model families, and correlates inversely with model performance, suggesting prompts have greatest impact on tasks where models struggle. Our findings advance prompt science, providing both practical guidance for prompt engineering and theoretical understanding of how instructional language shapes model behavior."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadr-etal-2026-zip">
<titleInfo>
<title>ZIP: Quantifying Which Words Matter in Zero-Shot Instructional Prompts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nikta</namePart>
<namePart type="given">Gohari</namePart>
<namePart type="family">Sadr</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sangmitra</namePart>
<namePart type="family">Madhusudan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arash</namePart>
<namePart type="family">Asgari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sajjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laleh</namePart>
<namePart type="family">Seyyed-Kalantari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Emami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>While zero-shot instructional prompts like “Let’s think step-by-step” have revolutionized Large Language Model performance, we lack systematic understanding of why: which specific words drive their effectiveness, and how do these patterns vary across tasks and models? We introduce the ZIP score (Zero-shot Importance of Perturbation), a metric that quantifies individual word importance through controlled, semantically meaningful perturbations. To enable rigorous evaluation, we also introduce the first ground-truth benchmark for prompt interpretability, a set of validation prompts with predetermined keywords where ZIP achieves 95.8% accuracy compared to 65.8% for LIME. Analyzing six flagship models across seven prompts and multiple task domains, we find that word importance is task-dependent (”step-by-step” dominates mathematical reasoning; “think” matters more for common-sense tasks), varies systematically across model families, and correlates inversely with model performance, suggesting prompts have greatest impact on tasks where models struggle. Our findings advance prompt science, providing both practical guidance for prompt engineering and theoretical understanding of how instructional language shapes model behavior.</abstract>
<identifier type="citekey">sadr-etal-2026-zip</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.30/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>428</start>
<end>453</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ZIP: Quantifying Which Words Matter in Zero-Shot Instructional Prompts
%A Sadr, Nikta Gohari
%A Madhusudan, Sangmitra
%A Asgari, Arash
%A Sajjad, Hassan
%A Seyyed-Kalantari, Laleh
%A Emami, Ali
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F sadr-etal-2026-zip
%X While zero-shot instructional prompts like “Let’s think step-by-step” have revolutionized Large Language Model performance, we lack systematic understanding of why: which specific words drive their effectiveness, and how do these patterns vary across tasks and models? We introduce the ZIP score (Zero-shot Importance of Perturbation), a metric that quantifies individual word importance through controlled, semantically meaningful perturbations. To enable rigorous evaluation, we also introduce the first ground-truth benchmark for prompt interpretability, a set of validation prompts with predetermined keywords where ZIP achieves 95.8% accuracy compared to 65.8% for LIME. Analyzing six flagship models across seven prompts and multiple task domains, we find that word importance is task-dependent (”step-by-step” dominates mathematical reasoning; “think” matters more for common-sense tasks), varies systematically across model families, and correlates inversely with model performance, suggesting prompts have greatest impact on tasks where models struggle. Our findings advance prompt science, providing both practical guidance for prompt engineering and theoretical understanding of how instructional language shapes model behavior.
%U https://aclanthology.org/2026.starsem-conference.30/
%P 428-453
Markdown (Informal)
[ZIP: Quantifying Which Words Matter in Zero-Shot Instructional Prompts](https://aclanthology.org/2026.starsem-conference.30/) (Sadr et al., *SEM 2026)
ACL
- Nikta Gohari Sadr, Sangmitra Madhusudan, Arash Asgari, Hassan Sajjad, Laleh Seyyed-Kalantari, and Ali Emami. 2026. ZIP: Quantifying Which Words Matter in Zero-Shot Instructional Prompts. In Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026), pages 428–453, San Diego, California, United States. Association for Computational Linguistics.