@inproceedings{takahashi-etal-2022-leveraging,
title = "Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification",
author = "Takahashi, Ryosuke and
Sasano, Ryohei and
Takeda, Koichi",
editor = "Nastase, Vivi and
Pavlick, Ellie and
Pilehvar, Mohammad Taher and
Camacho-Collados, Jose and
Raganato, Alessandro",
booktitle = "Proceedings of the 11th Joint Conference on Lexical and Computational Semantics",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.starsem-1.21",
doi = "10.18653/v1/2022.starsem-1.21",
pages = "234--239",
abstract = "Many linguistic expressions have idiomatic and literal interpretations, and the automatic distinction of these two interpretations has been studied for decades. Recent research has shown that contextualized word embeddings derived from masked language models (MLMs) can give promising results for idiom token classification. This indicates that contextualized word embedding alone contains information about whether the word is being used in a literal sense or not. However, we believe that more types of information can be derived from MLMs and that leveraging such information can improve idiom token classification. In this paper, we leverage three types of embeddings from MLMs; uncontextualized token embeddings and masked token embeddings in addition to the standard contextualized word embeddings and show that the newly added embeddings significantly improve idiom token classification for both English and Japanese datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="takahashi-etal-2022-leveraging">
<titleInfo>
<title>Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ryosuke</namePart>
<namePart type="family">Takahashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryohei</namePart>
<namePart type="family">Sasano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koichi</namePart>
<namePart type="family">Takeda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 11th Joint Conference on Lexical and Computational Semantics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vivi</namePart>
<namePart type="family">Nastase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ellie</namePart>
<namePart type="family">Pavlick</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Camacho-Collados</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Raganato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many linguistic expressions have idiomatic and literal interpretations, and the automatic distinction of these two interpretations has been studied for decades. Recent research has shown that contextualized word embeddings derived from masked language models (MLMs) can give promising results for idiom token classification. This indicates that contextualized word embedding alone contains information about whether the word is being used in a literal sense or not. However, we believe that more types of information can be derived from MLMs and that leveraging such information can improve idiom token classification. In this paper, we leverage three types of embeddings from MLMs; uncontextualized token embeddings and masked token embeddings in addition to the standard contextualized word embeddings and show that the newly added embeddings significantly improve idiom token classification for both English and Japanese datasets.</abstract>
<identifier type="citekey">takahashi-etal-2022-leveraging</identifier>
<identifier type="doi">10.18653/v1/2022.starsem-1.21</identifier>
<location>
<url>https://aclanthology.org/2022.starsem-1.21</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>234</start>
<end>239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification
%A Takahashi, Ryosuke
%A Sasano, Ryohei
%A Takeda, Koichi
%Y Nastase, Vivi
%Y Pavlick, Ellie
%Y Pilehvar, Mohammad Taher
%Y Camacho-Collados, Jose
%Y Raganato, Alessandro
%S Proceedings of the 11th Joint Conference on Lexical and Computational Semantics
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F takahashi-etal-2022-leveraging
%X Many linguistic expressions have idiomatic and literal interpretations, and the automatic distinction of these two interpretations has been studied for decades. Recent research has shown that contextualized word embeddings derived from masked language models (MLMs) can give promising results for idiom token classification. This indicates that contextualized word embedding alone contains information about whether the word is being used in a literal sense or not. However, we believe that more types of information can be derived from MLMs and that leveraging such information can improve idiom token classification. In this paper, we leverage three types of embeddings from MLMs; uncontextualized token embeddings and masked token embeddings in addition to the standard contextualized word embeddings and show that the newly added embeddings significantly improve idiom token classification for both English and Japanese datasets.
%R 10.18653/v1/2022.starsem-1.21
%U https://aclanthology.org/2022.starsem-1.21
%U https://doi.org/10.18653/v1/2022.starsem-1.21
%P 234-239
Markdown (Informal)
[Leveraging Three Types of Embeddings from Masked Language Models in Idiom Token Classification](https://aclanthology.org/2022.starsem-1.21) (Takahashi et al., *SEM 2022)
ACL