@inproceedings{swaminathan-cook-2023-token,
title = "Token-level Identification of Multiword Expressions using Pre-trained Multilingual Language Models",
author = "Swaminathan, Raghuraman and
Cook, Paul",
editor = "Bhatia, Archna and
Evang, Kilian and
Garcia, Marcos and
Giouli, Voula and
Han, Lifeng and
Taslimipoor, Shiva",
booktitle = "Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.mwe-1.1",
doi = "10.18653/v1/2023.mwe-1.1",
pages = "1--6",
abstract = "In this paper, we consider novel cross-lingual settings for multiword expression (MWE) identification (Ramisch et al., 2020) and idiomaticity prediction (Tayyar Madabushi et al., 2022) in which systems are tested on languages that are unseen during training. Our findings indicate that pre-trained multilingual language models are able to learn knowledge about MWEs and idiomaticity that is not languagespecific. Moreover, we find that training data from other languages can be leveraged to give improvements over monolingual models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="swaminathan-cook-2023-token">
<titleInfo>
<title>Token-level Identification of Multiword Expressions using Pre-trained Multilingual Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Raghuraman</namePart>
<namePart type="family">Swaminathan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Cook</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archna</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kilian</namePart>
<namePart type="family">Evang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Voula</namePart>
<namePart type="family">Giouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Taslimipoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we consider novel cross-lingual settings for multiword expression (MWE) identification (Ramisch et al., 2020) and idiomaticity prediction (Tayyar Madabushi et al., 2022) in which systems are tested on languages that are unseen during training. Our findings indicate that pre-trained multilingual language models are able to learn knowledge about MWEs and idiomaticity that is not languagespecific. Moreover, we find that training data from other languages can be leveraged to give improvements over monolingual models.</abstract>
<identifier type="citekey">swaminathan-cook-2023-token</identifier>
<identifier type="doi">10.18653/v1/2023.mwe-1.1</identifier>
<location>
<url>https://aclanthology.org/2023.mwe-1.1</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>1</start>
<end>6</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Token-level Identification of Multiword Expressions using Pre-trained Multilingual Language Models
%A Swaminathan, Raghuraman
%A Cook, Paul
%Y Bhatia, Archna
%Y Evang, Kilian
%Y Garcia, Marcos
%Y Giouli, Voula
%Y Han, Lifeng
%Y Taslimipoor, Shiva
%S Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F swaminathan-cook-2023-token
%X In this paper, we consider novel cross-lingual settings for multiword expression (MWE) identification (Ramisch et al., 2020) and idiomaticity prediction (Tayyar Madabushi et al., 2022) in which systems are tested on languages that are unseen during training. Our findings indicate that pre-trained multilingual language models are able to learn knowledge about MWEs and idiomaticity that is not languagespecific. Moreover, we find that training data from other languages can be leveraged to give improvements over monolingual models.
%R 10.18653/v1/2023.mwe-1.1
%U https://aclanthology.org/2023.mwe-1.1
%U https://doi.org/10.18653/v1/2023.mwe-1.1
%P 1-6
Markdown (Informal)
[Token-level Identification of Multiword Expressions using Pre-trained Multilingual Language Models](https://aclanthology.org/2023.mwe-1.1) (Swaminathan & Cook, MWE 2023)
ACL