@inproceedings{ravelli-etal-2024-macid,
title = "{MACID} - Multimodal {AC}tion {ID}entification: A {CALAMITA} Challenge",
author = "Ravelli, Andrea Amelio and
Varvara, Rossella and
Gregori, Lorenzo",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.137/",
pages = "1234--1238",
ISBN = "979-12-210-7060-6",
abstract = "This paper presents the Multimodal ACtion IDentification challenge (MACID), part of the first CALAMITA competition. The objective of this task is to evaluate the ability of large language models (LLMs) to differentiate between closely related action concepts based on textual descriptions alone. The challenge is inspired by the {\textquotedblleft}find the intruder{\textquotedblright} task, where models must identify an outlier among a set of 4 sentences that describe similar yet distinct actions. The dataset highlights action-predicate mismatches, where the same verb may describe different actions or different verbs may refer to the same action. Although currently mono-modal (text-only), the task is designed for future multimodal integration, linking visual and textual representations to enhance action recognition. By probing a model`s capacity to resolve subtle linguistic ambiguities, the challenge underscores the need for deeper cognitive understanding in action-language alignment, ultimately testing the boundaries of LLMs' ability to interpret action verbs and their associated concepts."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ravelli-etal-2024-macid">
<titleInfo>
<title>MACID - Multimodal ACtion IDentification: A CALAMITA Challenge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="given">Amelio</namePart>
<namePart type="family">Ravelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rossella</namePart>
<namePart type="family">Varvara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lorenzo</namePart>
<namePart type="family">Gregori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>This paper presents the Multimodal ACtion IDentification challenge (MACID), part of the first CALAMITA competition. The objective of this task is to evaluate the ability of large language models (LLMs) to differentiate between closely related action concepts based on textual descriptions alone. The challenge is inspired by the “find the intruder” task, where models must identify an outlier among a set of 4 sentences that describe similar yet distinct actions. The dataset highlights action-predicate mismatches, where the same verb may describe different actions or different verbs may refer to the same action. Although currently mono-modal (text-only), the task is designed for future multimodal integration, linking visual and textual representations to enhance action recognition. By probing a model‘s capacity to resolve subtle linguistic ambiguities, the challenge underscores the need for deeper cognitive understanding in action-language alignment, ultimately testing the boundaries of LLMs’ ability to interpret action verbs and their associated concepts.</abstract>
<identifier type="citekey">ravelli-etal-2024-macid</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.137/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>1234</start>
<end>1238</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MACID - Multimodal ACtion IDentification: A CALAMITA Challenge
%A Ravelli, Andrea Amelio
%A Varvara, Rossella
%A Gregori, Lorenzo
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F ravelli-etal-2024-macid
%X This paper presents the Multimodal ACtion IDentification challenge (MACID), part of the first CALAMITA competition. The objective of this task is to evaluate the ability of large language models (LLMs) to differentiate between closely related action concepts based on textual descriptions alone. The challenge is inspired by the “find the intruder” task, where models must identify an outlier among a set of 4 sentences that describe similar yet distinct actions. The dataset highlights action-predicate mismatches, where the same verb may describe different actions or different verbs may refer to the same action. Although currently mono-modal (text-only), the task is designed for future multimodal integration, linking visual and textual representations to enhance action recognition. By probing a model‘s capacity to resolve subtle linguistic ambiguities, the challenge underscores the need for deeper cognitive understanding in action-language alignment, ultimately testing the boundaries of LLMs’ ability to interpret action verbs and their associated concepts.
%U https://aclanthology.org/2024.clicit-1.137/
%P 1234-1238
Markdown (Informal)
[MACID - Multimodal ACtion IDentification: A CALAMITA Challenge](https://aclanthology.org/2024.clicit-1.137/) (Ravelli et al., CLiC-it 2024)
ACL