@inproceedings{colak-2026-idiomranker,
title = "{I}diom{R}anker-{X} at {MWE}-2026 {A}d{MIR}e 2: Multilingual Idiom-Image Alignment via Low-Rank Adaptation of Cross-Encoders",
author = "Colak, Mehmet Utku",
editor = {Ojha, Atul Kr. and
Mititelu, Verginica Barbu and
Constant, Mathieu and
Stoyanova, Ivelina and
Do{\u{g}}ru{\"o}z, A. Seza and
Rademaker, Alexandre},
booktitle = "Proceedings of the 22nd Workshop on Multiword Expressions ({MWE} 2026)",
month = mar,
year = "2026",
address = "Rabat, Marocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.mwe-1.16/",
pages = "134--138",
ISBN = "979-8-89176-363-0",
abstract = "This paper describes the system submitted for the $\textbf{MWE 2026 Shared Task}$ (AdMIRe 2.0 Subtask A). The submission focused on a text-centric approach, reframing the idiom-image alignment task as a sentence-pair classification problem using $\textbf{mBERT}$ (Multilingual BERT). The submitted system relied on full fine-tuning using only the English training data, achieving a Top-1 Accuracy of approximately $\textbf{0.30}$ on the blind test set. Following the evaluation phase, significant limitations were identified in the cross-lingual generalization of the base model. In a post-evaluation study, the backbone was upgraded to $\textbf{XLM-RoBERTa-Large-XNLI}$, incorporating $\textbf{Low-Rank Adaptation (LoRA)}$ and utilizing the full multilingual dataset with hard negative mining. These improvements boosted the accuracy to $\textbf{0.41}$, demonstrating the necessity of NLI-specific pre-training and parameter-efficient tuning for MWE-aware multimodal tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="colak-2026-idiomranker">
<titleInfo>
<title>IdiomRanker-X at MWE-2026 AdMIRe 2: Multilingual Idiom-Image Alignment via Low-Rank Adaptation of Cross-Encoders</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Utku</namePart>
<namePart type="family">Colak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="given">Barbu</namePart>
<namePart type="family">Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Constant</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Stoyanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">A</namePart>
<namePart type="given">Seza</namePart>
<namePart type="family">Doğruöz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Rademaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Marocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-363-0</identifier>
</relatedItem>
<abstract>This paper describes the system submitted for the MWE 2026 Shared Task (AdMIRe 2.0 Subtask A). The submission focused on a text-centric approach, reframing the idiom-image alignment task as a sentence-pair classification problem using mBERT (Multilingual BERT). The submitted system relied on full fine-tuning using only the English training data, achieving a Top-1 Accuracy of approximately 0.30 on the blind test set. Following the evaluation phase, significant limitations were identified in the cross-lingual generalization of the base model. In a post-evaluation study, the backbone was upgraded to XLM-RoBERTa-Large-XNLI, incorporating Low-Rank Adaptation (LoRA) and utilizing the full multilingual dataset with hard negative mining. These improvements boosted the accuracy to 0.41, demonstrating the necessity of NLI-specific pre-training and parameter-efficient tuning for MWE-aware multimodal tasks.</abstract>
<identifier type="citekey">colak-2026-idiomranker</identifier>
<location>
<url>https://aclanthology.org/2026.mwe-1.16/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>134</start>
<end>138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IdiomRanker-X at MWE-2026 AdMIRe 2: Multilingual Idiom-Image Alignment via Low-Rank Adaptation of Cross-Encoders
%A Colak, Mehmet Utku
%Y Ojha, Atul Kr.
%Y Mititelu, Verginica Barbu
%Y Constant, Mathieu
%Y Stoyanova, Ivelina
%Y Doğruöz, A. Seza
%Y Rademaker, Alexandre
%S Proceedings of the 22nd Workshop on Multiword Expressions (MWE 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Marocco
%@ 979-8-89176-363-0
%F colak-2026-idiomranker
%X This paper describes the system submitted for the MWE 2026 Shared Task (AdMIRe 2.0 Subtask A). The submission focused on a text-centric approach, reframing the idiom-image alignment task as a sentence-pair classification problem using mBERT (Multilingual BERT). The submitted system relied on full fine-tuning using only the English training data, achieving a Top-1 Accuracy of approximately 0.30 on the blind test set. Following the evaluation phase, significant limitations were identified in the cross-lingual generalization of the base model. In a post-evaluation study, the backbone was upgraded to XLM-RoBERTa-Large-XNLI, incorporating Low-Rank Adaptation (LoRA) and utilizing the full multilingual dataset with hard negative mining. These improvements boosted the accuracy to 0.41, demonstrating the necessity of NLI-specific pre-training and parameter-efficient tuning for MWE-aware multimodal tasks.
%U https://aclanthology.org/2026.mwe-1.16/
%P 134-138
Markdown (Informal)
[IdiomRanker-X at MWE-2026 AdMIRe 2: Multilingual Idiom-Image Alignment via Low-Rank Adaptation of Cross-Encoders](https://aclanthology.org/2026.mwe-1.16/) (Colak, MWE 2026)
ACL