@inproceedings{scaiella-etal-2024-leveraging,
title = "Leveraging Large Language Models for Fact Verification in {I}talian",
author = "Scaiella, Antonio and
Costanzo, Stefano and
Passone, Elisa and
Croce, Danilo and
Gambosi, Giorgio",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.97/",
pages = "898--908",
ISBN = "979-12-210-7060-6",
abstract = "In recent years, Automatic Fact Checking has become a crucial tool in combating fake news, leveraging AI to verify the accuracy of information. Despite significant advancements, most datasets and models are predominantly available in English, posing challenges for other languages. This paper presents an Italian resource based on the dataset made available in the FEVER evaluation campaign, created to train and evaluate fact-checking models in Italian. The dataset comprises approximately 240k examples, with over 2k test examples manually validated. Additionally, we fine-tuned a state-of-the-art LLM, namely LLaMA3, on both the original English and translated Italian datasets, demonstrating that fine-tuning significantly improves model performance. Our results suggest that the fine-tuned models achieve comparable accuracy in both languages, highlighting the value of the proposed resource."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="scaiella-etal-2024-leveraging">
<titleInfo>
<title>Leveraging Large Language Models for Fact Verification in Italian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Antonio</namePart>
<namePart type="family">Scaiella</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefano</namePart>
<namePart type="family">Costanzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisa</namePart>
<namePart type="family">Passone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giorgio</namePart>
<namePart type="family">Gambosi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>In recent years, Automatic Fact Checking has become a crucial tool in combating fake news, leveraging AI to verify the accuracy of information. Despite significant advancements, most datasets and models are predominantly available in English, posing challenges for other languages. This paper presents an Italian resource based on the dataset made available in the FEVER evaluation campaign, created to train and evaluate fact-checking models in Italian. The dataset comprises approximately 240k examples, with over 2k test examples manually validated. Additionally, we fine-tuned a state-of-the-art LLM, namely LLaMA3, on both the original English and translated Italian datasets, demonstrating that fine-tuning significantly improves model performance. Our results suggest that the fine-tuned models achieve comparable accuracy in both languages, highlighting the value of the proposed resource.</abstract>
<identifier type="citekey">scaiella-etal-2024-leveraging</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.97/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>898</start>
<end>908</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leveraging Large Language Models for Fact Verification in Italian
%A Scaiella, Antonio
%A Costanzo, Stefano
%A Passone, Elisa
%A Croce, Danilo
%A Gambosi, Giorgio
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the Tenth Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F scaiella-etal-2024-leveraging
%X In recent years, Automatic Fact Checking has become a crucial tool in combating fake news, leveraging AI to verify the accuracy of information. Despite significant advancements, most datasets and models are predominantly available in English, posing challenges for other languages. This paper presents an Italian resource based on the dataset made available in the FEVER evaluation campaign, created to train and evaluate fact-checking models in Italian. The dataset comprises approximately 240k examples, with over 2k test examples manually validated. Additionally, we fine-tuned a state-of-the-art LLM, namely LLaMA3, on both the original English and translated Italian datasets, demonstrating that fine-tuning significantly improves model performance. Our results suggest that the fine-tuned models achieve comparable accuracy in both languages, highlighting the value of the proposed resource.
%U https://aclanthology.org/2024.clicit-1.97/
%P 898-908
Markdown (Informal)
[Leveraging Large Language Models for Fact Verification in Italian](https://aclanthology.org/2024.clicit-1.97/) (Scaiella et al., CLiC-it 2024)
ACL