@inproceedings{buijtelaar-pezzelle-2023-psycholinguistic,
title = "A Psycholinguistic Analysis of {BERT}{'}s Representations of Compounds",
author = "Buijtelaar, Lars and
Pezzelle, Sandro",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.163",
doi = "10.18653/v1/2023.eacl-main.163",
pages = "2230--2241",
abstract = "This work studies the semantic representations learned by BERT for compounds, that is, expressions such as sunlight or bodyguard. We build on recent studies that explore semantic information in Transformers at the word level and test whether BERT aligns with human semantic intuitions when dealing with expressions (e.g., sunlight) whose overall meaning depends{---}to a various extent{---}on the semantics of the constituent words (sun, light). We leverage a dataset that includes human judgments on two psycholinguistic measures of compound semantic analysis: lexeme meaning dominance (LMD; quantifying the weight of each constituent toward the compound meaning) and semantic transparency (ST; evaluating the extent to which the compound meaning is recoverable from the constituents{'} semantics). We show that BERT-based measures moderately align with human intuitions, especially when using contextualized representations, and that LMD is overall more predictable than ST. Contrary to the results reported for {`}standard{'} words, higher, more contextualized layers are the best at representing compound meaning. These findings shed new light on the abilities of BERT in dealing with fine-grained semantic phenomena. Moreover, they can provide insights into how speakers represent compounds.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buijtelaar-pezzelle-2023-psycholinguistic">
<titleInfo>
<title>A Psycholinguistic Analysis of BERT’s Representations of Compounds</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="family">Buijtelaar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandro</namePart>
<namePart type="family">Pezzelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This work studies the semantic representations learned by BERT for compounds, that is, expressions such as sunlight or bodyguard. We build on recent studies that explore semantic information in Transformers at the word level and test whether BERT aligns with human semantic intuitions when dealing with expressions (e.g., sunlight) whose overall meaning depends—to a various extent—on the semantics of the constituent words (sun, light). We leverage a dataset that includes human judgments on two psycholinguistic measures of compound semantic analysis: lexeme meaning dominance (LMD; quantifying the weight of each constituent toward the compound meaning) and semantic transparency (ST; evaluating the extent to which the compound meaning is recoverable from the constituents’ semantics). We show that BERT-based measures moderately align with human intuitions, especially when using contextualized representations, and that LMD is overall more predictable than ST. Contrary to the results reported for ‘standard’ words, higher, more contextualized layers are the best at representing compound meaning. These findings shed new light on the abilities of BERT in dealing with fine-grained semantic phenomena. Moreover, they can provide insights into how speakers represent compounds.</abstract>
<identifier type="citekey">buijtelaar-pezzelle-2023-psycholinguistic</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.163</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.163</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2230</start>
<end>2241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Psycholinguistic Analysis of BERT’s Representations of Compounds
%A Buijtelaar, Lars
%A Pezzelle, Sandro
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F buijtelaar-pezzelle-2023-psycholinguistic
%X This work studies the semantic representations learned by BERT for compounds, that is, expressions such as sunlight or bodyguard. We build on recent studies that explore semantic information in Transformers at the word level and test whether BERT aligns with human semantic intuitions when dealing with expressions (e.g., sunlight) whose overall meaning depends—to a various extent—on the semantics of the constituent words (sun, light). We leverage a dataset that includes human judgments on two psycholinguistic measures of compound semantic analysis: lexeme meaning dominance (LMD; quantifying the weight of each constituent toward the compound meaning) and semantic transparency (ST; evaluating the extent to which the compound meaning is recoverable from the constituents’ semantics). We show that BERT-based measures moderately align with human intuitions, especially when using contextualized representations, and that LMD is overall more predictable than ST. Contrary to the results reported for ‘standard’ words, higher, more contextualized layers are the best at representing compound meaning. These findings shed new light on the abilities of BERT in dealing with fine-grained semantic phenomena. Moreover, they can provide insights into how speakers represent compounds.
%R 10.18653/v1/2023.eacl-main.163
%U https://aclanthology.org/2023.eacl-main.163
%U https://doi.org/10.18653/v1/2023.eacl-main.163
%P 2230-2241
Markdown (Informal)
[A Psycholinguistic Analysis of BERT’s Representations of Compounds](https://aclanthology.org/2023.eacl-main.163) (Buijtelaar & Pezzelle, EACL 2023)
ACL