@inproceedings{wold-etal-2026-measuring,
title = "Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality",
author = "Wold, Sondre and
Simon, {\'E}tienne and
Velldal, Erik and
{\O}vrelid, Lilja",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-long.99/",
pages = "2239--2252",
ISBN = "979-8-89176-380-7",
abstract = "The principle of compositionality, which concerns the construction of meaning from constituent parts, is a longstanding topic in various disciplines, most commonly associated with formal semantics. In NLP, recent studies have focused on the compositional properties of text embedding models, particularly regarding their sensitivity to idiomatic expression, as idioms have traditionally been seen as non-compositional. In this paper, we argue that it is unclear how previous work relates to formal definitions of the principle. To address this limitation, we take a theoretically motivated approach based on definitions in formal semantics. We present $\varepsilon$-compositionality, a continuous relaxation of compositionality derived from these definitions. We measure $\varepsilon$-compositionality on a dataset containing both idiomatic and non-idiomatic sentences, providing a theoretically motivated assessment of sensitivity to idiomaticity. Our findings indicate that most text embedding models differentiate between idiomatic and non-idiomatic phrases, although to varying degrees."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wold-etal-2026-measuring">
<titleInfo>
<title>Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sondre</namePart>
<namePart type="family">Wold</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Étienne</namePart>
<namePart type="family">Simon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Velldal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-380-7</identifier>
</relatedItem>
<abstract>The principle of compositionality, which concerns the construction of meaning from constituent parts, is a longstanding topic in various disciplines, most commonly associated with formal semantics. In NLP, recent studies have focused on the compositional properties of text embedding models, particularly regarding their sensitivity to idiomatic expression, as idioms have traditionally been seen as non-compositional. In this paper, we argue that it is unclear how previous work relates to formal definitions of the principle. To address this limitation, we take a theoretically motivated approach based on definitions in formal semantics. We present ǎrepsilon-compositionality, a continuous relaxation of compositionality derived from these definitions. We measure ǎrepsilon-compositionality on a dataset containing both idiomatic and non-idiomatic sentences, providing a theoretically motivated assessment of sensitivity to idiomaticity. Our findings indicate that most text embedding models differentiate between idiomatic and non-idiomatic phrases, although to varying degrees.</abstract>
<identifier type="citekey">wold-etal-2026-measuring</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-long.99/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>2239</start>
<end>2252</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality
%A Wold, Sondre
%A Simon, Étienne
%A Velldal, Erik
%A Øvrelid, Lilja
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-380-7
%F wold-etal-2026-measuring
%X The principle of compositionality, which concerns the construction of meaning from constituent parts, is a longstanding topic in various disciplines, most commonly associated with formal semantics. In NLP, recent studies have focused on the compositional properties of text embedding models, particularly regarding their sensitivity to idiomatic expression, as idioms have traditionally been seen as non-compositional. In this paper, we argue that it is unclear how previous work relates to formal definitions of the principle. To address this limitation, we take a theoretically motivated approach based on definitions in formal semantics. We present ǎrepsilon-compositionality, a continuous relaxation of compositionality derived from these definitions. We measure ǎrepsilon-compositionality on a dataset containing both idiomatic and non-idiomatic sentences, providing a theoretically motivated assessment of sensitivity to idiomaticity. Our findings indicate that most text embedding models differentiate between idiomatic and non-idiomatic phrases, although to varying degrees.
%U https://aclanthology.org/2026.eacl-long.99/
%P 2239-2252
Markdown (Informal)
[Measuring Idiomaticity in Text Embedding Models with epsilon-compositionality](https://aclanthology.org/2026.eacl-long.99/) (Wold et al., EACL 2026)
ACL