@inproceedings{bogdanova-bucur-2026-pmi,
title = "{PMI} {MWE} Scorer at {PARSEME} 2.0 Subtask 1: identifying multi-word expressions using pointwise mutual information and universal dependencies",
author = "Bogdanova, Anna and
Bucur, Ileana",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mwe-1.22/",
pages = "165--169",
ISBN = "979-8-89176-363-0",
abstract = "Multi-word expressions (MWEs) remain a challenge for NLP systems due to their syntactic variability and non-compositional semantics, that is why this issue was proposed as shared task within Unidive organization. With increasing popularity of large language models (LLM) it is important to continue researching alternative solutions. One of classical approaches for identifying MWEs is calculating pointwise mutual information (PMI), but this is a purely statistical approach that cannot unveil the links between words in natural text. To fix this issue we propose this paper with a simple syntax-aware PMI method that leverages Universal Dependency (UD) trees (Nivre et al.,2016) to model co-occurrence between syntactically related words. By computing PMI over dependency-linked word pairs and aggregating these scores, we aim to improve surface-based methods. Opposed to expectations, our experiment shows that classical statistical approach gets better results in identifying MWEs partially. Still, this approach is aimed to find a balance between lightweight calculations as opposed to LLMs and precision in results."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bogdanova-bucur-2026-pmi">
<titleInfo>
<title>PMI MWE Scorer at PARSEME 2.0 Subtask 1: identifying multi-word expressions using pointwise mutual information and universal dependencies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Bogdanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ileana</namePart>
<namePart type="family">Bucur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-363-0</identifier>
</relatedItem>
<abstract>Multi-word expressions (MWEs) remain a challenge for NLP systems due to their syntactic variability and non-compositional semantics, that is why this issue was proposed as shared task within Unidive organization. With increasing popularity of large language models (LLM) it is important to continue researching alternative solutions. One of classical approaches for identifying MWEs is calculating pointwise mutual information (PMI), but this is a purely statistical approach that cannot unveil the links between words in natural text. To fix this issue we propose this paper with a simple syntax-aware PMI method that leverages Universal Dependency (UD) trees (Nivre et al.,2016) to model co-occurrence between syntactically related words. By computing PMI over dependency-linked word pairs and aggregating these scores, we aim to improve surface-based methods. Opposed to expectations, our experiment shows that classical statistical approach gets better results in identifying MWEs partially. Still, this approach is aimed to find a balance between lightweight calculations as opposed to LLMs and precision in results.</abstract>
<identifier type="citekey">bogdanova-bucur-2026-pmi</identifier>
<location>
<url>https://aclanthology.org/2026.mwe-1.22/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>165</start>
<end>169</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PMI MWE Scorer at PARSEME 2.0 Subtask 1: identifying multi-word expressions using pointwise mutual information and universal dependencies
%A Bogdanova, Anna
%A Bucur, Ileana
%Y Ojha, Atul Kr.
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Stoyanova, Ivelina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-363-0
%F bogdanova-bucur-2026-pmi
%X Multi-word expressions (MWEs) remain a challenge for NLP systems due to their syntactic variability and non-compositional semantics, that is why this issue was proposed as shared task within Unidive organization. With increasing popularity of large language models (LLM) it is important to continue researching alternative solutions. One of classical approaches for identifying MWEs is calculating pointwise mutual information (PMI), but this is a purely statistical approach that cannot unveil the links between words in natural text. To fix this issue we propose this paper with a simple syntax-aware PMI method that leverages Universal Dependency (UD) trees (Nivre et al.,2016) to model co-occurrence between syntactically related words. By computing PMI over dependency-linked word pairs and aggregating these scores, we aim to improve surface-based methods. Opposed to expectations, our experiment shows that classical statistical approach gets better results in identifying MWEs partially. Still, this approach is aimed to find a balance between lightweight calculations as opposed to LLMs and precision in results.
%U https://aclanthology.org/2026.mwe-1.22/
%P 165-169
Markdown (Informal)
[PMI MWE Scorer at PARSEME 2.0 Subtask 1: identifying multi-word expressions using pointwise mutual information and universal dependencies](https://aclanthology.org/2026.mwe-1.22/) (Bogdanova & Bucur, MWE 2026)
ACL