@inproceedings{vegner-etal-2025-behavioural,
title = "Behavioural vs. Representational Systematicity in End-to-End Models: An Opinionated Survey",
author = "Vegner, Ivan and
de Souza, Sydelle and
Forch, Valentin and
Lewis, Martha and
Doumas, Leonidas A. A.",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.1537/",
doi = "10.18653/v1/2025.acl-long.1537",
pages = "31842--31856",
ISBN = "979-8-89176-251-0",
abstract = "A core aspect of compositionality, systematicity is a desirable property in ML models as it enables strong generalization to novel contexts. This has led to numerous studies proposing benchmarks to assess systematic generalization, as well as models and training regimes designed to enhance it. Many of these efforts are framed as addressing the challenge posed by Fodor and Pylyshyn. However, while they argue for systematicity of representations, existing benchmarks and models primarily focus on the systematicity of behaviour. We emphasize the crucial nature of this distinction. Furthermore, building on Hadley{'}s (1994) taxonomy of systematic generalization, we analyze the extent to which behavioural systematicity is tested by key benchmarks in the literature across language and vision. Finally, we highlight ways of assessing systematicity of representations in ML models as practiced in the field of mechanistic interpretability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vegner-etal-2025-behavioural">
<titleInfo>
<title>Behavioural vs. Representational Systematicity in End-to-End Models: An Opinionated Survey</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="family">Vegner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sydelle</namePart>
<namePart type="family">de Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Forch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martha</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonidas</namePart>
<namePart type="given">A</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Doumas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>A core aspect of compositionality, systematicity is a desirable property in ML models as it enables strong generalization to novel contexts. This has led to numerous studies proposing benchmarks to assess systematic generalization, as well as models and training regimes designed to enhance it. Many of these efforts are framed as addressing the challenge posed by Fodor and Pylyshyn. However, while they argue for systematicity of representations, existing benchmarks and models primarily focus on the systematicity of behaviour. We emphasize the crucial nature of this distinction. Furthermore, building on Hadley’s (1994) taxonomy of systematic generalization, we analyze the extent to which behavioural systematicity is tested by key benchmarks in the literature across language and vision. Finally, we highlight ways of assessing systematicity of representations in ML models as practiced in the field of mechanistic interpretability.</abstract>
<identifier type="citekey">vegner-etal-2025-behavioural</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.1537</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.1537/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>31842</start>
<end>31856</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Behavioural vs. Representational Systematicity in End-to-End Models: An Opinionated Survey
%A Vegner, Ivan
%A de Souza, Sydelle
%A Forch, Valentin
%A Lewis, Martha
%A Doumas, Leonidas A. A.
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F vegner-etal-2025-behavioural
%X A core aspect of compositionality, systematicity is a desirable property in ML models as it enables strong generalization to novel contexts. This has led to numerous studies proposing benchmarks to assess systematic generalization, as well as models and training regimes designed to enhance it. Many of these efforts are framed as addressing the challenge posed by Fodor and Pylyshyn. However, while they argue for systematicity of representations, existing benchmarks and models primarily focus on the systematicity of behaviour. We emphasize the crucial nature of this distinction. Furthermore, building on Hadley’s (1994) taxonomy of systematic generalization, we analyze the extent to which behavioural systematicity is tested by key benchmarks in the literature across language and vision. Finally, we highlight ways of assessing systematicity of representations in ML models as practiced in the field of mechanistic interpretability.
%R 10.18653/v1/2025.acl-long.1537
%U https://aclanthology.org/2025.acl-long.1537/
%U https://doi.org/10.18653/v1/2025.acl-long.1537
%P 31842-31856
Markdown (Informal)
[Behavioural vs. Representational Systematicity in End-to-End Models: An Opinionated Survey](https://aclanthology.org/2025.acl-long.1537/) (Vegner et al., ACL 2025)
ACL