@inproceedings{ranjan-2026-one,
title = "One Word Is Not Enough: Simple Prompts Improve Word Embeddings",
author = "Ranjan, Rajeev",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.32/",
pages = "464--473",
ISBN = "979-8-89176-413-2",
abstract = "Text embedding models are designed for sentence-level applications like retrieval and semantic similarity, and are primarily evaluated on sentence-level benchmarks. Their behavior on isolated words is less understood. We show that simply prepending semantic prompts to words before embedding substantially improves word similarity correlations. Testing 7 text embedding models, including text-embedding-3-large (OpenAI), embed-english-v3.0 (Cohere), voyage-3 (Voyage AI), all-mpnet-base-v2, and Qwen3-Embedding-8B, on 3 standard benchmarks (SimLex-999, WordSim-353, MEN-3000), we find that prompts like ``meaning: {word}'' or ``Represent the semantic concept: {word}'' improve Spearman correlations by up to +0.28 on SimLex-999. Some models fail completely on bare words ({\ensuremath{\rho}} {\ensuremath{\approx}} 0) but recover with prompts (+0.73 improvement). Our best results achieve {\ensuremath{\rho}}=0.692 on SimLex-999 with embed-english-v3.0 (Cohere), {\ensuremath{\rho}}=0.811 on WordSim-353, and {\ensuremath{\rho}}=0.855 on MEN-3000 with text-embedding-3-large (OpenAI). These results outperform classic static embeddings like Word2Vec ({\ensuremath{\rho}}=0.40) and even the best static method LexVec ({\ensuremath{\rho}}=0.48) on SimLex-999, establishing a new state-of-the-art for pure embedding methods. This zero-shot technique requires no training and works with any text embedding model."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ranjan-2026-one">
<titleInfo>
<title>One Word Is Not Enough: Simple Prompts Improve Word Embeddings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rajeev</namePart>
<namePart type="family">Ranjan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>Text embedding models are designed for sentence-level applications like retrieval and semantic similarity, and are primarily evaluated on sentence-level benchmarks. Their behavior on isolated words is less understood. We show that simply prepending semantic prompts to words before embedding substantially improves word similarity correlations. Testing 7 text embedding models, including text-embedding-3-large (OpenAI), embed-english-v3.0 (Cohere), voyage-3 (Voyage AI), all-mpnet-base-v2, and Qwen3-Embedding-8B, on 3 standard benchmarks (SimLex-999, WordSim-353, MEN-3000), we find that prompts like “meaning: word” or “Represent the semantic concept: word” improve Spearman correlations by up to +0.28 on SimLex-999. Some models fail completely on bare words (\ensuremathρ \ensuremath\approx 0) but recover with prompts (+0.73 improvement). Our best results achieve \ensuremathρ=0.692 on SimLex-999 with embed-english-v3.0 (Cohere), \ensuremathρ=0.811 on WordSim-353, and \ensuremathρ=0.855 on MEN-3000 with text-embedding-3-large (OpenAI). These results outperform classic static embeddings like Word2Vec (\ensuremathρ=0.40) and even the best static method LexVec (\ensuremathρ=0.48) on SimLex-999, establishing a new state-of-the-art for pure embedding methods. This zero-shot technique requires no training and works with any text embedding model.</abstract>
<identifier type="citekey">ranjan-2026-one</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.32/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>464</start>
<end>473</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T One Word Is Not Enough: Simple Prompts Improve Word Embeddings
%A Ranjan, Rajeev
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F ranjan-2026-one
%X Text embedding models are designed for sentence-level applications like retrieval and semantic similarity, and are primarily evaluated on sentence-level benchmarks. Their behavior on isolated words is less understood. We show that simply prepending semantic prompts to words before embedding substantially improves word similarity correlations. Testing 7 text embedding models, including text-embedding-3-large (OpenAI), embed-english-v3.0 (Cohere), voyage-3 (Voyage AI), all-mpnet-base-v2, and Qwen3-Embedding-8B, on 3 standard benchmarks (SimLex-999, WordSim-353, MEN-3000), we find that prompts like “meaning: word” or “Represent the semantic concept: word” improve Spearman correlations by up to +0.28 on SimLex-999. Some models fail completely on bare words (\ensuremathρ \ensuremath\approx 0) but recover with prompts (+0.73 improvement). Our best results achieve \ensuremathρ=0.692 on SimLex-999 with embed-english-v3.0 (Cohere), \ensuremathρ=0.811 on WordSim-353, and \ensuremathρ=0.855 on MEN-3000 with text-embedding-3-large (OpenAI). These results outperform classic static embeddings like Word2Vec (\ensuremathρ=0.40) and even the best static method LexVec (\ensuremathρ=0.48) on SimLex-999, establishing a new state-of-the-art for pure embedding methods. This zero-shot technique requires no training and works with any text embedding model.
%U https://aclanthology.org/2026.starsem-conference.32/
%P 464-473
Markdown (Informal)
[One Word Is Not Enough: Simple Prompts Improve Word Embeddings](https://aclanthology.org/2026.starsem-conference.32/) (Ranjan, *SEM 2026)
ACL