@inproceedings{hromei-etal-2024-la,
title = "La Non Canonica L`hai Studiata? Exploring {LLM}s and Sentence Canonicity in {I}talian",
author = "Hromei, Claudiu and
Croce, Danilo and
Delmonte, Rodolfo and
Basili, Roberto",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.52/",
pages = "431--439",
ISBN = "979-12-210-7060-6",
abstract = "This paper investigates the ability of Large Language Models (LLMs) to differentiate between canonical and non-canonical sentences in Italian, employing advanced neural architectures like LLaMA and its adaptations. Canonical sentences adhere to the standard Subject-Verb-Object (SVO) structure. We hypothesize that recent generative LLMs are influenced heavily by the English language, where non-canonical structures are very rare. Using the in-context learning technique, we probe these models and further fine-tune them for this specific task. Initial results indicate that these models continue to struggle with this task even after fine-tuning. Additionally, we introduce a new dataset comprising several hundred sentences from the poetry domain, which presents significant challenges for the canonical structure task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hromei-etal-2024-la">
<titleInfo>
<title>La Non Canonica L‘hai Studiata? Exploring LLMs and Sentence Canonicity in Italian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claudiu</namePart>
<namePart type="family">Hromei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Danilo</namePart>
<namePart type="family">Croce</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rodolfo</namePart>
<namePart type="family">Delmonte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roberto</namePart>
<namePart type="family">Basili</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>This paper investigates the ability of Large Language Models (LLMs) to differentiate between canonical and non-canonical sentences in Italian, employing advanced neural architectures like LLaMA and its adaptations. Canonical sentences adhere to the standard Subject-Verb-Object (SVO) structure. We hypothesize that recent generative LLMs are influenced heavily by the English language, where non-canonical structures are very rare. Using the in-context learning technique, we probe these models and further fine-tune them for this specific task. Initial results indicate that these models continue to struggle with this task even after fine-tuning. Additionally, we introduce a new dataset comprising several hundred sentences from the poetry domain, which presents significant challenges for the canonical structure task.</abstract>
<identifier type="citekey">hromei-etal-2024-la</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.52/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>431</start>
<end>439</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T La Non Canonica L‘hai Studiata? Exploring LLMs and Sentence Canonicity in Italian
%A Hromei, Claudiu
%A Croce, Danilo
%A Delmonte, Rodolfo
%A Basili, Roberto
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F hromei-etal-2024-la
%X This paper investigates the ability of Large Language Models (LLMs) to differentiate between canonical and non-canonical sentences in Italian, employing advanced neural architectures like LLaMA and its adaptations. Canonical sentences adhere to the standard Subject-Verb-Object (SVO) structure. We hypothesize that recent generative LLMs are influenced heavily by the English language, where non-canonical structures are very rare. Using the in-context learning technique, we probe these models and further fine-tune them for this specific task. Initial results indicate that these models continue to struggle with this task even after fine-tuning. Additionally, we introduce a new dataset comprising several hundred sentences from the poetry domain, which presents significant challenges for the canonical structure task.
%U https://aclanthology.org/2024.clicit-1.52/
%P 431-439
Markdown (Informal)
[La Non Canonica L’hai Studiata? Exploring LLMs and Sentence Canonicity in Italian](https://aclanthology.org/2024.clicit-1.52/) (Hromei et al., CLiC-it 2024)
ACL