@inproceedings{deletombe-etal-2026-diversity,
title = "Diversity patterns run deep: Impact of diversity intake on multiword expression identification",
author = "Deletombe, Mathilde and
Scholivet, Manon and
Est{\`e}ve, Louis and
Lavergne, Thomas and
Savary, Agata",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mwe-1.13/",
pages = "110--116",
ISBN = "979-8-89176-363-0",
abstract = "Multiword expressions (MWEs) are good examples of a phenomenon where identification systems struggle with generalisation: MWE present in the test set but absent in the training set are rarely identified. This raises the question of the diversity of the test set, relative to that of the train set, and how this impacts performance. We set out to measure how much diversity of a train corpus increases when adding individual MWEs from the test corpus, and how this increase impacts MWE identification performance. We measure diversity across a three-dimension framework and find mostly consistent negative correlations with performance in 14 languages and 8 systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="deletombe-etal-2026-diversity">
<titleInfo>
<title>Diversity patterns run deep: Impact of diversity intake on multiword expression identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathilde</namePart>
<namePart type="family">Deletombe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manon</namePart>
<namePart type="family">Scholivet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis</namePart>
<namePart type="family">Estève</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Lavergne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-363-0</identifier>
</relatedItem>
<abstract>Multiword expressions (MWEs) are good examples of a phenomenon where identification systems struggle with generalisation: MWE present in the test set but absent in the training set are rarely identified. This raises the question of the diversity of the test set, relative to that of the train set, and how this impacts performance. We set out to measure how much diversity of a train corpus increases when adding individual MWEs from the test corpus, and how this increase impacts MWE identification performance. We measure diversity across a three-dimension framework and find mostly consistent negative correlations with performance in 14 languages and 8 systems.</abstract>
<identifier type="citekey">deletombe-etal-2026-diversity</identifier>
<location>
<url>https://aclanthology.org/2026.mwe-1.13/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>110</start>
<end>116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Diversity patterns run deep: Impact of diversity intake on multiword expression identification
%A Deletombe, Mathilde
%A Scholivet, Manon
%A Estève, Louis
%A Lavergne, Thomas
%A Savary, Agata
%Y Ojha, Atul Kr.
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Stoyanova, Ivelina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-363-0
%F deletombe-etal-2026-diversity
%X Multiword expressions (MWEs) are good examples of a phenomenon where identification systems struggle with generalisation: MWE present in the test set but absent in the training set are rarely identified. This raises the question of the diversity of the test set, relative to that of the train set, and how this impacts performance. We set out to measure how much diversity of a train corpus increases when adding individual MWEs from the test corpus, and how this increase impacts MWE identification performance. We measure diversity across a three-dimension framework and find mostly consistent negative correlations with performance in 14 languages and 8 systems.
%U https://aclanthology.org/2026.mwe-1.13/
%P 110-116
Markdown (Informal)
[Diversity patterns run deep: Impact of diversity intake on multiword expression identification](https://aclanthology.org/2026.mwe-1.13/) (Deletombe et al., MWE 2026)
ACL