@article{gari-soler-apidianaki-2021-lets,
title = "Let{'}s Play Mono-Poly: {BERT} Can Reveal Words{'} Polysemy Level and Partitionability into Senses",
author = "Gar{\'\i} Soler, Aina and
Apidianaki, Marianna",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "9",
year = "2021",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2021.tacl-1.50",
doi = "10.1162/tacl_a_00400",
pages = "825--844",
abstract = "Pre-trained language models (LMs) encode rich information about linguistic structure but their knowledge about lexical polysemy remains unclear. We propose a novel experimental setup for analyzing this knowledge in LMs specifically trained for different languages (English, French, Spanish, and Greek) and in multilingual BERT. We perform our analysis on datasets carefully designed to reflect different sense distributions, and control for parameters that are highly correlated with polysemy such as frequency and grammatical category. We demonstrate that BERT-derived representations reflect words{'} polysemy level and their partitionability into senses. Polysemy-related information is more clearly present in English BERT embeddings, but models in other languages also manage to establish relevant distinctions between words at different polysemy levels. Our results contribute to a better understanding of the knowledge encoded in contextualized representations and open up new avenues for multilingual lexical semantics research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gari-soler-apidianaki-2021-lets">
<titleInfo>
<title>Let’s Play Mono-Poly: BERT Can Reveal Words’ Polysemy Level and Partitionability into Senses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aina</namePart>
<namePart type="family">Garí Soler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Pre-trained language models (LMs) encode rich information about linguistic structure but their knowledge about lexical polysemy remains unclear. We propose a novel experimental setup for analyzing this knowledge in LMs specifically trained for different languages (English, French, Spanish, and Greek) and in multilingual BERT. We perform our analysis on datasets carefully designed to reflect different sense distributions, and control for parameters that are highly correlated with polysemy such as frequency and grammatical category. We demonstrate that BERT-derived representations reflect words’ polysemy level and their partitionability into senses. Polysemy-related information is more clearly present in English BERT embeddings, but models in other languages also manage to establish relevant distinctions between words at different polysemy levels. Our results contribute to a better understanding of the knowledge encoded in contextualized representations and open up new avenues for multilingual lexical semantics research.</abstract>
<identifier type="citekey">gari-soler-apidianaki-2021-lets</identifier>
<identifier type="doi">10.1162/tacl_a_00400</identifier>
<location>
<url>https://aclanthology.org/2021.tacl-1.50</url>
</location>
<part>
<date>2021</date>
<detail type="volume"><number>9</number></detail>
<extent unit="page">
<start>825</start>
<end>844</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Let’s Play Mono-Poly: BERT Can Reveal Words’ Polysemy Level and Partitionability into Senses
%A Garí Soler, Aina
%A Apidianaki, Marianna
%J Transactions of the Association for Computational Linguistics
%D 2021
%V 9
%I MIT Press
%C Cambridge, MA
%F gari-soler-apidianaki-2021-lets
%X Pre-trained language models (LMs) encode rich information about linguistic structure but their knowledge about lexical polysemy remains unclear. We propose a novel experimental setup for analyzing this knowledge in LMs specifically trained for different languages (English, French, Spanish, and Greek) and in multilingual BERT. We perform our analysis on datasets carefully designed to reflect different sense distributions, and control for parameters that are highly correlated with polysemy such as frequency and grammatical category. We demonstrate that BERT-derived representations reflect words’ polysemy level and their partitionability into senses. Polysemy-related information is more clearly present in English BERT embeddings, but models in other languages also manage to establish relevant distinctions between words at different polysemy levels. Our results contribute to a better understanding of the knowledge encoded in contextualized representations and open up new avenues for multilingual lexical semantics research.
%R 10.1162/tacl_a_00400
%U https://aclanthology.org/2021.tacl-1.50
%U https://doi.org/10.1162/tacl_a_00400
%P 825-844
Markdown (Informal)
[Let’s Play Mono-Poly: BERT Can Reveal Words’ Polysemy Level and Partitionability into Senses](https://aclanthology.org/2021.tacl-1.50) (Garí Soler & Apidianaki, TACL 2021)
ACL