@inproceedings{alzetta-etal-2024-similex,
title = "{S}imil{E}x: The First {I}talian Dataset for Sentence Similarity with Natural Language Explanations",
author = "Alzetta, Chiara and
Dell{'}orletta, Felice and
Fazzone, Chiara and
Venturi, Giulia",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.4/",
pages = "20--28",
ISBN = "979-12-210-7060-6",
abstract = "Large language models (LLMs) demonstrate great performance in natural language processing and understanding tasks. However, much work remains to enhance their interpretability. Annotated datasets with explanations could be key to addressing this issue, as they enable the development of models that provide human-like explanations for their decisions. In this paper, we introduce the SimilEx dataset, the first Italian dataset reporting human evaluations of similarity between pairs of sentences. For a subset of these pairs, the annotators also provided explanations in natural language for the scores assigned. The SimilEx dataset is valuable for exploring the variability in similarity perception between sentences and for training LLMs to offer human-like explanations for their predictions."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alzetta-etal-2024-similex">
<titleInfo>
<title>SimilEx: The First Italian Dataset for Sentence Similarity with Natural Language Explanations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Alzetta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chiara</namePart>
<namePart type="family">Fazzone</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giulia</namePart>
<namePart type="family">Venturi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>Large language models (LLMs) demonstrate great performance in natural language processing and understanding tasks. However, much work remains to enhance their interpretability. Annotated datasets with explanations could be key to addressing this issue, as they enable the development of models that provide human-like explanations for their decisions. In this paper, we introduce the SimilEx dataset, the first Italian dataset reporting human evaluations of similarity between pairs of sentences. For a subset of these pairs, the annotators also provided explanations in natural language for the scores assigned. The SimilEx dataset is valuable for exploring the variability in similarity perception between sentences and for training LLMs to offer human-like explanations for their predictions.</abstract>
<identifier type="citekey">alzetta-etal-2024-similex</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.4/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>20</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SimilEx: The First Italian Dataset for Sentence Similarity with Natural Language Explanations
%A Alzetta, Chiara
%A Dell’orletta, Felice
%A Fazzone, Chiara
%A Venturi, Giulia
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F alzetta-etal-2024-similex
%X Large language models (LLMs) demonstrate great performance in natural language processing and understanding tasks. However, much work remains to enhance their interpretability. Annotated datasets with explanations could be key to addressing this issue, as they enable the development of models that provide human-like explanations for their decisions. In this paper, we introduce the SimilEx dataset, the first Italian dataset reporting human evaluations of similarity between pairs of sentences. For a subset of these pairs, the annotators also provided explanations in natural language for the scores assigned. The SimilEx dataset is valuable for exploring the variability in similarity perception between sentences and for training LLMs to offer human-like explanations for their predictions.
%U https://aclanthology.org/2024.clicit-1.4/
%P 20-28
Markdown (Informal)
[SimilEx: The First Italian Dataset for Sentence Similarity with Natural Language Explanations](https://aclanthology.org/2024.clicit-1.4/) (Alzetta et al., CLiC-it 2024)
ACL