@inproceedings{moise-nisioi-2026-morphofiltered,
title = "{M}orpho{F}iltered-Gemini at {MWE}-2026 {PARSEME} 2.0 Subtask 1: Tackling {LLM} Overgeneration via Universal {POS}-based Constraints",
author = "Moise, Irina and
Nisioi, Sergiu",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mwe-1.26/",
pages = "196--202",
ISBN = "979-8-89176-363-0",
abstract = "This paper describes MorphoFiltered-Gemini, a multilingual system submitted to the PARSEME 2.0 shared task on multiword expression (MWE) identification. The system relies on Google Gemini 2.0 Flash-Lite to generate MWE predictions using zero-shot and selectively applied few-shot prompting, without fine-tuning or language-specific resources. To reduce the tendency of large language models to over-generate MWEs, we introduce a lightweight morphological post-filter that removes unlikely constructions while preserving high-precision patterns.Rather than optimizing peak performance for individual languages, our approach prioritizes precision and cross-lingual robustness. As a result, the system exhibits stable behavior across 17 typologically diverse languages and achieves the highest Shannon evenness score among all submitted systems. The experimental results highlight a clear trade-off between recall-oriented LLM prompting strategies and precision-oriented filtering, and show that simple linguistic constraints can effectively improve the stability of LLM-based multilingual MWE identification systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moise-nisioi-2026-morphofiltered">
<titleInfo>
<title>MorphoFiltered-Gemini at MWE-2026 PARSEME 2.0 Subtask 1: Tackling LLM Overgeneration via Universal POS-based Constraints</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Moise</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sergiu</namePart>
<namePart type="family">Nisioi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-363-0</identifier>
</relatedItem>
<abstract>This paper describes MorphoFiltered-Gemini, a multilingual system submitted to the PARSEME 2.0 shared task on multiword expression (MWE) identification. The system relies on Google Gemini 2.0 Flash-Lite to generate MWE predictions using zero-shot and selectively applied few-shot prompting, without fine-tuning or language-specific resources. To reduce the tendency of large language models to over-generate MWEs, we introduce a lightweight morphological post-filter that removes unlikely constructions while preserving high-precision patterns.Rather than optimizing peak performance for individual languages, our approach prioritizes precision and cross-lingual robustness. As a result, the system exhibits stable behavior across 17 typologically diverse languages and achieves the highest Shannon evenness score among all submitted systems. The experimental results highlight a clear trade-off between recall-oriented LLM prompting strategies and precision-oriented filtering, and show that simple linguistic constraints can effectively improve the stability of LLM-based multilingual MWE identification systems.</abstract>
<identifier type="citekey">moise-nisioi-2026-morphofiltered</identifier>
<location>
<url>https://aclanthology.org/2026.mwe-1.26/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>196</start>
<end>202</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MorphoFiltered-Gemini at MWE-2026 PARSEME 2.0 Subtask 1: Tackling LLM Overgeneration via Universal POS-based Constraints
%A Moise, Irina
%A Nisioi, Sergiu
%Y Ojha, Atul Kr.
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Stoyanova, Ivelina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-363-0
%F moise-nisioi-2026-morphofiltered
%X This paper describes MorphoFiltered-Gemini, a multilingual system submitted to the PARSEME 2.0 shared task on multiword expression (MWE) identification. The system relies on Google Gemini 2.0 Flash-Lite to generate MWE predictions using zero-shot and selectively applied few-shot prompting, without fine-tuning or language-specific resources. To reduce the tendency of large language models to over-generate MWEs, we introduce a lightweight morphological post-filter that removes unlikely constructions while preserving high-precision patterns.Rather than optimizing peak performance for individual languages, our approach prioritizes precision and cross-lingual robustness. As a result, the system exhibits stable behavior across 17 typologically diverse languages and achieves the highest Shannon evenness score among all submitted systems. The experimental results highlight a clear trade-off between recall-oriented LLM prompting strategies and precision-oriented filtering, and show that simple linguistic constraints can effectively improve the stability of LLM-based multilingual MWE identification systems.
%U https://aclanthology.org/2026.mwe-1.26/
%P 196-202
Markdown (Informal)
[MorphoFiltered-Gemini at MWE-2026 PARSEME 2.0 Subtask 1: Tackling LLM Overgeneration via Universal POS-based Constraints](https://aclanthology.org/2026.mwe-1.26/) (Moise & Nisioi, MWE 2026)
ACL