@article{fodor-etal-2025-compositionality,
title = "Compositionality and Sentence Meaning: Comparing Semantic Parsing and Transformers on a Challenging Sentence Similarity Dataset",
author = "Fodor, James and
Deyne, Simon De and
Suzuki, Shinsuke",
journal = "Computational Linguistics",
volume = "51",
number = "1",
month = mar,
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.cl-1.5/",
doi = "10.1162/coli_a_00536",
pages = "139--190",
abstract = "One of the major outstanding questions in computational semantics is how humans integrate the meaning of individual words into a sentence in a way that enables understanding of complex and novel combinations of words, a phenomenon known as compositionality. Many approaches to modeling the process of compositionality can be classified as either {\textquotedblleft}vector-based{\textquotedblright} models, in which the meaning of a sentence is represented as a vector of numbers, or {\textquotedblleft}syntax-based{\textquotedblright} models, in which the meaning of a sentence is represented as a structured tree of labeled components. A major barrier in assessing and comparing these contrasting approaches is the lack of large, relevant datasets for model comparison. This article aims to address this gap by introducing a new dataset, STS3k, which consists of 2,800 pairs of sentences rated for semantic similarity by human participants. The sentence pairs have been selected to systematically vary different combinations of words, providing a rigorous test and enabling a clearer picture of the comparative strengths and weaknesses of vector-based and syntax-based methods. Our results show that when tested on the new STS3k dataset, state-of-the-art transformers poorly capture the pattern of human semantic similarity judgments, while even simple methods for combining syntax- and vector-based components into a novel hybrid model yield substantial improvements. We further show that this improvement is due to the ability of the hybrid model to replicate human sensitivity to specific changes in sentence structure. Our findings provide evidence for the value of integrating multiple methods to better reflect the way in which humans mentally represent compositional meaning."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fodor-etal-2025-compositionality">
<titleInfo>
<title>Compositionality and Sentence Meaning: Comparing Semantic Parsing and Transformers on a Challenging Sentence Similarity Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Fodor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="given">De</namePart>
<namePart type="family">Deyne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shinsuke</namePart>
<namePart type="family">Suzuki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>One of the major outstanding questions in computational semantics is how humans integrate the meaning of individual words into a sentence in a way that enables understanding of complex and novel combinations of words, a phenomenon known as compositionality. Many approaches to modeling the process of compositionality can be classified as either “vector-based” models, in which the meaning of a sentence is represented as a vector of numbers, or “syntax-based” models, in which the meaning of a sentence is represented as a structured tree of labeled components. A major barrier in assessing and comparing these contrasting approaches is the lack of large, relevant datasets for model comparison. This article aims to address this gap by introducing a new dataset, STS3k, which consists of 2,800 pairs of sentences rated for semantic similarity by human participants. The sentence pairs have been selected to systematically vary different combinations of words, providing a rigorous test and enabling a clearer picture of the comparative strengths and weaknesses of vector-based and syntax-based methods. Our results show that when tested on the new STS3k dataset, state-of-the-art transformers poorly capture the pattern of human semantic similarity judgments, while even simple methods for combining syntax- and vector-based components into a novel hybrid model yield substantial improvements. We further show that this improvement is due to the ability of the hybrid model to replicate human sensitivity to specific changes in sentence structure. Our findings provide evidence for the value of integrating multiple methods to better reflect the way in which humans mentally represent compositional meaning.</abstract>
<identifier type="citekey">fodor-etal-2025-compositionality</identifier>
<identifier type="doi">10.1162/coli_a_00536</identifier>
<location>
<url>https://aclanthology.org/2025.cl-1.5/</url>
</location>
<part>
<date>2025-03</date>
<detail type="volume"><number>51</number></detail>
<detail type="issue"><number>1</number></detail>
<extent unit="page">
<start>139</start>
<end>190</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Compositionality and Sentence Meaning: Comparing Semantic Parsing and Transformers on a Challenging Sentence Similarity Dataset
%A Fodor, James
%A Deyne, Simon De
%A Suzuki, Shinsuke
%J Computational Linguistics
%D 2025
%8 March
%V 51
%N 1
%I MIT Press
%C Cambridge, MA
%F fodor-etal-2025-compositionality
%X One of the major outstanding questions in computational semantics is how humans integrate the meaning of individual words into a sentence in a way that enables understanding of complex and novel combinations of words, a phenomenon known as compositionality. Many approaches to modeling the process of compositionality can be classified as either “vector-based” models, in which the meaning of a sentence is represented as a vector of numbers, or “syntax-based” models, in which the meaning of a sentence is represented as a structured tree of labeled components. A major barrier in assessing and comparing these contrasting approaches is the lack of large, relevant datasets for model comparison. This article aims to address this gap by introducing a new dataset, STS3k, which consists of 2,800 pairs of sentences rated for semantic similarity by human participants. The sentence pairs have been selected to systematically vary different combinations of words, providing a rigorous test and enabling a clearer picture of the comparative strengths and weaknesses of vector-based and syntax-based methods. Our results show that when tested on the new STS3k dataset, state-of-the-art transformers poorly capture the pattern of human semantic similarity judgments, while even simple methods for combining syntax- and vector-based components into a novel hybrid model yield substantial improvements. We further show that this improvement is due to the ability of the hybrid model to replicate human sensitivity to specific changes in sentence structure. Our findings provide evidence for the value of integrating multiple methods to better reflect the way in which humans mentally represent compositional meaning.
%R 10.1162/coli_a_00536
%U https://aclanthology.org/2025.cl-1.5/
%U https://doi.org/10.1162/coli_a_00536
%P 139-190
Markdown (Informal)
[Compositionality and Sentence Meaning: Comparing Semantic Parsing and Transformers on a Challenging Sentence Similarity Dataset](https://aclanthology.org/2025.cl-1.5/) (Fodor et al., CL 2025)
ACL