@inproceedings{yeginbergen-etal-2025-hitz,
title = "{H}i{TZ}-Ixa at {S}em{E}val-2025 Task 1: Multimodal Idiomatic Language Understanding",
author = "Yeginbergen, Anar and
Sanchez - Bayona, Elisa and
Jaunarena, Andrea and
Salaberria, Ander",
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.293/",
pages = "2256--2261",
ISBN = "979-8-89176-273-2",
abstract = "In this paper, we present our approach to the AdMIRe (Advancing Multimodal Idiomaticity Representation) shared task, outlining the methodologies and strategies employed to tackle the challenges of idiomatic expressions in multimodal contexts. We discuss both successful and unsuccessful approaches, including the use of models of varying sizes and experiments involving zero- and few-shot learning. Our final submission, based on a zero-shot instruction-following vision-and-language model (VLM), achieved 13th place for the English test set and 1st place for the Portuguese test set on the preliminary leaderboard.We investigate the performance of open VLMs in this task, demonstrating that both large language models (LLMs) and VLMs exhibit strong capabilities in identifying idiomatic expressions. However, we also identify significant limitations in both model types, including instability and a tendency to generate hallucinated content, which raises concerns about their reliability in interpreting figurative language. Our findings emphasize the need for further advancements in multimodal models to improve their robustness and mitigate these issues."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yeginbergen-etal-2025-hitz">
<titleInfo>
<title>HiTZ-Ixa at SemEval-2025 Task 1: Multimodal Idiomatic Language Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anar</namePart>
<namePart type="family">Yeginbergen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Sanchez - Bayona</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Jaunarena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ander</namePart>
<namePart type="family">Salaberria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>In this paper, we present our approach to the AdMIRe (Advancing Multimodal Idiomaticity Representation) shared task, outlining the methodologies and strategies employed to tackle the challenges of idiomatic expressions in multimodal contexts. We discuss both successful and unsuccessful approaches, including the use of models of varying sizes and experiments involving zero- and few-shot learning. Our final submission, based on a zero-shot instruction-following vision-and-language model (VLM), achieved 13th place for the English test set and 1st place for the Portuguese test set on the preliminary leaderboard.We investigate the performance of open VLMs in this task, demonstrating that both large language models (LLMs) and VLMs exhibit strong capabilities in identifying idiomatic expressions. However, we also identify significant limitations in both model types, including instability and a tendency to generate hallucinated content, which raises concerns about their reliability in interpreting figurative language. Our findings emphasize the need for further advancements in multimodal models to improve their robustness and mitigate these issues.</abstract>
<identifier type="citekey">yeginbergen-etal-2025-hitz</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.293/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>2256</start>
<end>2261</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HiTZ-Ixa at SemEval-2025 Task 1: Multimodal Idiomatic Language Understanding
%A Yeginbergen, Anar
%A Sanchez - Bayona, Elisa
%A Jaunarena, Andrea
%A Salaberria, Ander
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F yeginbergen-etal-2025-hitz
%X In this paper, we present our approach to the AdMIRe (Advancing Multimodal Idiomaticity Representation) shared task, outlining the methodologies and strategies employed to tackle the challenges of idiomatic expressions in multimodal contexts. We discuss both successful and unsuccessful approaches, including the use of models of varying sizes and experiments involving zero- and few-shot learning. Our final submission, based on a zero-shot instruction-following vision-and-language model (VLM), achieved 13th place for the English test set and 1st place for the Portuguese test set on the preliminary leaderboard.We investigate the performance of open VLMs in this task, demonstrating that both large language models (LLMs) and VLMs exhibit strong capabilities in identifying idiomatic expressions. However, we also identify significant limitations in both model types, including instability and a tendency to generate hallucinated content, which raises concerns about their reliability in interpreting figurative language. Our findings emphasize the need for further advancements in multimodal models to improve their robustness and mitigate these issues.
%U https://aclanthology.org/2025.semeval-1.293/
%P 2256-2261
Markdown (Informal)
[HiTZ-Ixa at SemEval-2025 Task 1: Multimodal Idiomatic Language Understanding](https://aclanthology.org/2025.semeval-1.293/) (Yeginbergen et al., SemEval 2025)
ACL