@inproceedings{attanasio-etal-2024-calamita,
title = "{CALAMITA}: Challenge the Abilities of {LA}nguage Models in {ITA}lian",
author = "Attanasio, Giuseppe and
Basile, Pierpaolo and
Borazio, Federico and
Croce, Danilo and
Francis, Maria and
Gili, Jacopo and
Musacchio, Elio and
Nissim, Malvina and
Patti, Viviana and
Rinaldi, Matteo and
Scalena, Daniel",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.116/",
pages = "1054--1063",
ISBN = "979-12-210-7060-6",
abstract = "The rapid development of Large Language Models (LLMs) has called for robust benchmarks to assess their abilities, track progress, and compare iterations. While existing benchmarks provide extensive evaluations across diverse tasks, they predominantly focus on English, leaving other languages underserved. For Italian, the EVALITA campaigns have provided a long-standing tradition of classification-focused shared tasks. However, their scope does not fully align with the nuanced evaluation required for modern LLMs. To address this gap, we introduce {\textquotedblleft}Challenge the Abilities of LAnguage Models in ITAlian{\textquotedblright} (CALAMITA), a collaborative effort to create a dynamic and growing benchmark tailored to Italian. CALAMITA emphasizes diversity in task design to test a wide range of LLM capabilities through resources natively developed in Italian by the community. This initiative includes a shared platform, live leaderboard, and centralized evaluation framework. This paper outlines the collaborative process, initial challenges, and evaluation framework of CALAMITA."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="attanasio-etal-2024-calamita">
<titleInfo>
<title>CALAMITA: Challenge the Abilities of LAnguage Models in ITAlian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Attanasio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierpaolo</namePart>
<namePart type="family">Basile</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federico</namePart>
<namePart type="family">Borazio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Francis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacopo</namePart>
<namePart type="family">Gili</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elio</namePart>
<namePart type="family">Musacchio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviana</namePart>
<namePart type="family">Patti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matteo</namePart>
<namePart type="family">Rinaldi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Scalena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>The rapid development of Large Language Models (LLMs) has called for robust benchmarks to assess their abilities, track progress, and compare iterations. While existing benchmarks provide extensive evaluations across diverse tasks, they predominantly focus on English, leaving other languages underserved. For Italian, the EVALITA campaigns have provided a long-standing tradition of classification-focused shared tasks. However, their scope does not fully align with the nuanced evaluation required for modern LLMs. To address this gap, we introduce “Challenge the Abilities of LAnguage Models in ITAlian” (CALAMITA), a collaborative effort to create a dynamic and growing benchmark tailored to Italian. CALAMITA emphasizes diversity in task design to test a wide range of LLM capabilities through resources natively developed in Italian by the community. This initiative includes a shared platform, live leaderboard, and centralized evaluation framework. This paper outlines the collaborative process, initial challenges, and evaluation framework of CALAMITA.</abstract>
<identifier type="citekey">attanasio-etal-2024-calamita</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.116/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>1054</start>
<end>1063</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CALAMITA: Challenge the Abilities of LAnguage Models in ITAlian
%A Attanasio, Giuseppe
%A Basile, Pierpaolo
%A Borazio, Federico
%A Croce, Danilo
%A Francis, Maria
%A Gili, Jacopo
%A Musacchio, Elio
%A Nissim, Malvina
%A Patti, Viviana
%A Rinaldi, Matteo
%A Scalena, Daniel
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F attanasio-etal-2024-calamita
%X The rapid development of Large Language Models (LLMs) has called for robust benchmarks to assess their abilities, track progress, and compare iterations. While existing benchmarks provide extensive evaluations across diverse tasks, they predominantly focus on English, leaving other languages underserved. For Italian, the EVALITA campaigns have provided a long-standing tradition of classification-focused shared tasks. However, their scope does not fully align with the nuanced evaluation required for modern LLMs. To address this gap, we introduce “Challenge the Abilities of LAnguage Models in ITAlian” (CALAMITA), a collaborative effort to create a dynamic and growing benchmark tailored to Italian. CALAMITA emphasizes diversity in task design to test a wide range of LLM capabilities through resources natively developed in Italian by the community. This initiative includes a shared platform, live leaderboard, and centralized evaluation framework. This paper outlines the collaborative process, initial challenges, and evaluation framework of CALAMITA.
%U https://aclanthology.org/2024.clicit-1.116/
%P 1054-1063
Markdown (Informal)
[CALAMITA: Challenge the Abilities of LAnguage Models in ITAlian](https://aclanthology.org/2024.clicit-1.116/) (Attanasio et al., CLiC-it 2024)
ACL
- Giuseppe Attanasio, Pierpaolo Basile, Federico Borazio, Danilo Croce, Maria Francis, Jacopo Gili, Elio Musacchio, Malvina Nissim, Viviana Patti, Matteo Rinaldi, and Daniel Scalena. 2024. CALAMITA: Challenge the Abilities of LAnguage Models in ITAlian. In Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), pages 1054–1063, Pisa, Italy. CEUR Workshop Proceedings.