@inproceedings{marro-2026-compositional,
title = "Compositional Meaning Representations in {LLM}s: a Critical Review of Probing Studies",
author = "Marro, R{\'e}my",
editor = "Mohammad, Saif M. and
Ousidhoum, Nedjma",
booktitle = "Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*{SEM} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.starsem-conference.13/",
pages = "198--210",
ISBN = "979-8-89176-413-2",
abstract = "Large language models (LLMs) appear successful in emulating compositional language, yet it remains unclear what these results entail about their underlying compositional semantic representations. The probing classifier paradigm has emerged as a tool to remedy this. This paper proposes to critically review the findings of 24 probing studies targeting a wide range of linguistic and semantic phenomena. It proposes a taxonomy of probing tasks based on the linguistic primitives they presuppose, distinguishing four tiers: lexical semantics, the syntax{--}semantics interface, propositional semantics, and discourse and pragmatics. A gradient in representational evidence emerges: LLMs robustly encode lexical information, display less consistent sensitivity to structural relations within sentences, and obtain unsatisfactory results on tasks requiring propositional content, speech acts, or pragmatic inference. The review underscores the need for a clearer theoretical grounding of what probing tasks measure and reflects on how probing can illuminate the compositional pathways available within current language models."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marro-2026-compositional">
<titleInfo>
<title>Compositional Meaning Representations in LLMs: a Critical Review of Probing Studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rémy</namePart>
<namePart type="family">Marro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saif</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Mohammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nedjma</namePart>
<namePart type="family">Ousidhoum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-413-2</identifier>
</relatedItem>
<abstract>Large language models (LLMs) appear successful in emulating compositional language, yet it remains unclear what these results entail about their underlying compositional semantic representations. The probing classifier paradigm has emerged as a tool to remedy this. This paper proposes to critically review the findings of 24 probing studies targeting a wide range of linguistic and semantic phenomena. It proposes a taxonomy of probing tasks based on the linguistic primitives they presuppose, distinguishing four tiers: lexical semantics, the syntax–semantics interface, propositional semantics, and discourse and pragmatics. A gradient in representational evidence emerges: LLMs robustly encode lexical information, display less consistent sensitivity to structural relations within sentences, and obtain unsatisfactory results on tasks requiring propositional content, speech acts, or pragmatic inference. The review underscores the need for a clearer theoretical grounding of what probing tasks measure and reflects on how probing can illuminate the compositional pathways available within current language models.</abstract>
<identifier type="citekey">marro-2026-compositional</identifier>
<location>
<url>https://aclanthology.org/2026.starsem-conference.13/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>198</start>
<end>210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Compositional Meaning Representations in LLMs: a Critical Review of Probing Studies
%A Marro, Rémy
%Y Mohammad, Saif M.
%Y Ousidhoum, Nedjma
%S Proceedings of the 15th Joint Conference on Lexical and Computational Semantics (*SEM 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-413-2
%F marro-2026-compositional
%X Large language models (LLMs) appear successful in emulating compositional language, yet it remains unclear what these results entail about their underlying compositional semantic representations. The probing classifier paradigm has emerged as a tool to remedy this. This paper proposes to critically review the findings of 24 probing studies targeting a wide range of linguistic and semantic phenomena. It proposes a taxonomy of probing tasks based on the linguistic primitives they presuppose, distinguishing four tiers: lexical semantics, the syntax–semantics interface, propositional semantics, and discourse and pragmatics. A gradient in representational evidence emerges: LLMs robustly encode lexical information, display less consistent sensitivity to structural relations within sentences, and obtain unsatisfactory results on tasks requiring propositional content, speech acts, or pragmatic inference. The review underscores the need for a clearer theoretical grounding of what probing tasks measure and reflects on how probing can illuminate the compositional pathways available within current language models.
%U https://aclanthology.org/2026.starsem-conference.13/
%P 198-210
Markdown (Informal)
[Compositional Meaning Representations in LLMs: a Critical Review of Probing Studies](https://aclanthology.org/2026.starsem-conference.13/) (Marro, *SEM 2026)
ACL