@inproceedings{laureano-de-leon-etal-2025-evaluating,
title = "Evaluating Large Language Models on Multiword Expressions in Multilingual and Code-Switched Contexts",
author = "Laureano De Leon, Frances Adriana and
Abbas, Asim and
Tayyar Madabushi, Harish and
Lee, Mark",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.75/",
pages = "644--653",
abstract = "Multiword expressions, characterised by non-compositional meanings and syntactic irregularities, are an example of nuanced language. These expressions can be used literally or idiomatically, leading to significant changes in meaning. Although large language models perform well on many tasks, their ability to handle subtle linguistic phenomena remains unclear. This study examines how state-of-the-art models process the ambiguity of potentially idiomatic multiword expressions, particularly in less frequent contexts where memorisation is less likely to help. By evaluating models in Portuguese, Galician, and English, and introducing a new code-switched dataset and task, we show that large language models, despite their strengths, have difficulty handling nuanced language. In particular, we find that the latest models, including GPT-4, fail to outperform the xlm-roBERTa-base baselines in both detection and semantic tasks, with especially poor performance on the novel tasks we introduce, despite its similarity to existing tasks. Overall, our results demonstrate that multiword expressions, especially those that are ambiguous, continue to be a challenge to models. We provide open access to our datasets, prompts and model responses."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laureano-de-leon-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Large Language Models on Multiword Expressions in Multilingual and Code-Switched Contexts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frances</namePart>
<namePart type="given">Adriana</namePart>
<namePart type="family">Laureano De Leon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asim</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harish</namePart>
<namePart type="family">Tayyar Madabushi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Multiword expressions, characterised by non-compositional meanings and syntactic irregularities, are an example of nuanced language. These expressions can be used literally or idiomatically, leading to significant changes in meaning. Although large language models perform well on many tasks, their ability to handle subtle linguistic phenomena remains unclear. This study examines how state-of-the-art models process the ambiguity of potentially idiomatic multiword expressions, particularly in less frequent contexts where memorisation is less likely to help. By evaluating models in Portuguese, Galician, and English, and introducing a new code-switched dataset and task, we show that large language models, despite their strengths, have difficulty handling nuanced language. In particular, we find that the latest models, including GPT-4, fail to outperform the xlm-roBERTa-base baselines in both detection and semantic tasks, with especially poor performance on the novel tasks we introduce, despite its similarity to existing tasks. Overall, our results demonstrate that multiword expressions, especially those that are ambiguous, continue to be a challenge to models. We provide open access to our datasets, prompts and model responses.</abstract>
<identifier type="citekey">laureano-de-leon-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.75/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>644</start>
<end>653</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Large Language Models on Multiword Expressions in Multilingual and Code-Switched Contexts
%A Laureano De Leon, Frances Adriana
%A Abbas, Asim
%A Tayyar Madabushi, Harish
%A Lee, Mark
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F laureano-de-leon-etal-2025-evaluating
%X Multiword expressions, characterised by non-compositional meanings and syntactic irregularities, are an example of nuanced language. These expressions can be used literally or idiomatically, leading to significant changes in meaning. Although large language models perform well on many tasks, their ability to handle subtle linguistic phenomena remains unclear. This study examines how state-of-the-art models process the ambiguity of potentially idiomatic multiword expressions, particularly in less frequent contexts where memorisation is less likely to help. By evaluating models in Portuguese, Galician, and English, and introducing a new code-switched dataset and task, we show that large language models, despite their strengths, have difficulty handling nuanced language. In particular, we find that the latest models, including GPT-4, fail to outperform the xlm-roBERTa-base baselines in both detection and semantic tasks, with especially poor performance on the novel tasks we introduce, despite its similarity to existing tasks. Overall, our results demonstrate that multiword expressions, especially those that are ambiguous, continue to be a challenge to models. We provide open access to our datasets, prompts and model responses.
%U https://aclanthology.org/2025.ranlp-1.75/
%P 644-653
Markdown (Informal)
[Evaluating Large Language Models on Multiword Expressions in Multilingual and Code-Switched Contexts](https://aclanthology.org/2025.ranlp-1.75/) (Laureano De Leon et al., RANLP 2025)
ACL