@inproceedings{rossini-plas-2026-binary,
title = "Binary Token-Level Classification with {D}e{BERT}a for All-Type {MWE} Identification: A Lightweight Approach with Linguistic Enhancement",
author = "Rossini, Diego and
Plas, Lonneke Van Der",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.135/",
pages = "2600--2610",
ISBN = "979-8-89176-386-9",
abstract = "We present a comprehensive approach for multiword expression (MWE) identification that combines binary token-level classification, linguistic feature integration, and data augmentation. Our DeBERTa-v3-large model achieves 69.8{\%} F1 on the CoAM dataset, surpassing the best results (Qwen-72B, 57.8{\%} F1) on this dataset by 12 points while using 165 times fewer parameters. We achieve this performance by (1) reformulating detection as binary token-level START/END/INSIDE classification rather than span-based prediction, (2) incorporating NP chunking and dependency features that help discontinuous and NOUN-type MWEs identification, and (3) applying oversampling that addresses severe class imbalance in the training data. We confirm the generalization of our method on the STREUSLE dataset, achieving 78.9{\%} F1. These results demonstrate that carefully designed smaller models can substantially outperform LLMs on structured NLP tasks, with important implications for resource-constrained deployments."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rossini-plas-2026-binary">
<titleInfo>
<title>Binary Token-Level Classification with DeBERTa for All-Type MWE Identification: A Lightweight Approach with Linguistic Enhancement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Rossini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lonneke</namePart>
<namePart type="given">Van</namePart>
<namePart type="given">Der</namePart>
<namePart type="family">Plas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>We present a comprehensive approach for multiword expression (MWE) identification that combines binary token-level classification, linguistic feature integration, and data augmentation. Our DeBERTa-v3-large model achieves 69.8% F1 on the CoAM dataset, surpassing the best results (Qwen-72B, 57.8% F1) on this dataset by 12 points while using 165 times fewer parameters. We achieve this performance by (1) reformulating detection as binary token-level START/END/INSIDE classification rather than span-based prediction, (2) incorporating NP chunking and dependency features that help discontinuous and NOUN-type MWEs identification, and (3) applying oversampling that addresses severe class imbalance in the training data. We confirm the generalization of our method on the STREUSLE dataset, achieving 78.9% F1. These results demonstrate that carefully designed smaller models can substantially outperform LLMs on structured NLP tasks, with important implications for resource-constrained deployments.</abstract>
<identifier type="citekey">rossini-plas-2026-binary</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.135/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>2600</start>
<end>2610</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Binary Token-Level Classification with DeBERTa for All-Type MWE Identification: A Lightweight Approach with Linguistic Enhancement
%A Rossini, Diego
%A Plas, Lonneke Van Der
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F rossini-plas-2026-binary
%X We present a comprehensive approach for multiword expression (MWE) identification that combines binary token-level classification, linguistic feature integration, and data augmentation. Our DeBERTa-v3-large model achieves 69.8% F1 on the CoAM dataset, surpassing the best results (Qwen-72B, 57.8% F1) on this dataset by 12 points while using 165 times fewer parameters. We achieve this performance by (1) reformulating detection as binary token-level START/END/INSIDE classification rather than span-based prediction, (2) incorporating NP chunking and dependency features that help discontinuous and NOUN-type MWEs identification, and (3) applying oversampling that addresses severe class imbalance in the training data. We confirm the generalization of our method on the STREUSLE dataset, achieving 78.9% F1. These results demonstrate that carefully designed smaller models can substantially outperform LLMs on structured NLP tasks, with important implications for resource-constrained deployments.
%U https://aclanthology.org/2026.findings-eacl.135/
%P 2600-2610
Markdown (Informal)
[Binary Token-Level Classification with DeBERTa for All-Type MWE Identification: A Lightweight Approach with Linguistic Enhancement](https://aclanthology.org/2026.findings-eacl.135/) (Rossini & Plas, Findings 2026)
ACL