@inproceedings{noviello-tamburini-2024-exploring,
title = "Exploring Text-Embedding Retrieval Models for the {I}talian Language",
author = "Noviello, Yuri and
Tamburini, Fabio",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.73/",
pages = "654--661",
ISBN = "979-12-210-7060-6",
abstract = "Text retrieval systems have become essential in the field of natural language processing (NLP), serving as the backbone for applications such as search engines, document indexing, and information retrieval. With the rise of generative AI, particularly Retrieval-Augmented Generation (RAG) systems, the demand for robust text retrieval models has increased. However, existing large language models (LLMs) and datasets are often insufficiently optimized for Italian, limiting their performance in Italian text retrieval tasks. This paper addresses this gap by proposing both a data collection and specialized models tailored for Italian text retrieval. Through extensive experimentation, we analyze the improvements and limitations in retrieval performance, paving the way for more effective Italian NLP applications."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="noviello-tamburini-2024-exploring">
<titleInfo>
<title>Exploring Text-Embedding Retrieval Models for the Italian Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Noviello</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fabio</namePart>
<namePart type="family">Tamburini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>Text retrieval systems have become essential in the field of natural language processing (NLP), serving as the backbone for applications such as search engines, document indexing, and information retrieval. With the rise of generative AI, particularly Retrieval-Augmented Generation (RAG) systems, the demand for robust text retrieval models has increased. However, existing large language models (LLMs) and datasets are often insufficiently optimized for Italian, limiting their performance in Italian text retrieval tasks. This paper addresses this gap by proposing both a data collection and specialized models tailored for Italian text retrieval. Through extensive experimentation, we analyze the improvements and limitations in retrieval performance, paving the way for more effective Italian NLP applications.</abstract>
<identifier type="citekey">noviello-tamburini-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.73/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>654</start>
<end>661</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring Text-Embedding Retrieval Models for the Italian Language
%A Noviello, Yuri
%A Tamburini, Fabio
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F noviello-tamburini-2024-exploring
%X Text retrieval systems have become essential in the field of natural language processing (NLP), serving as the backbone for applications such as search engines, document indexing, and information retrieval. With the rise of generative AI, particularly Retrieval-Augmented Generation (RAG) systems, the demand for robust text retrieval models has increased. However, existing large language models (LLMs) and datasets are often insufficiently optimized for Italian, limiting their performance in Italian text retrieval tasks. This paper addresses this gap by proposing both a data collection and specialized models tailored for Italian text retrieval. Through extensive experimentation, we analyze the improvements and limitations in retrieval performance, paving the way for more effective Italian NLP applications.
%U https://aclanthology.org/2024.clicit-1.73/
%P 654-661
Markdown (Informal)
[Exploring Text-Embedding Retrieval Models for the Italian Language](https://aclanthology.org/2024.clicit-1.73/) (Noviello & Tamburini, CLiC-it 2024)
ACL