@inproceedings{fioravanti-etal-2024-automatic,
title = "Automatic Error Detection: Comparing {AI} vs. Human Performance on {L}2 {I}talian Texts",
author = "Fioravanti, Irene and
Forti, Luciana and
Spina, Stefania",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.44/",
pages = "366--372",
ISBN = "979-12-210-7060-6",
abstract = "This paper reports on a study aimed at comparing AI vs. human performance in detecting and categorising errors in L2 Italian texts. Four LLMs were considered: ChatGPT, Copilot, Gemini and Llama3. Two groups of human annotators were involved: L1 and L2 speakers of Italian. A gold standard set of annotations was developed. A fine-grained annotation scheme was adopted, to reflect the specific traits of Italian morphosyntax, with related potential learner errors. Overall, we found that human annotation outperforms AI, with some degree of variation with respect tospecific error types. An increased attention to languages other than English in NLP may significantly improve AI performance in this pivotal task for the many domains of language-related disciplines."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fioravanti-etal-2024-automatic">
<titleInfo>
<title>Automatic Error Detection: Comparing AI vs. Human Performance on L2 Italian Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irene</namePart>
<namePart type="family">Fioravanti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luciana</namePart>
<namePart type="family">Forti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Spina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>This paper reports on a study aimed at comparing AI vs. human performance in detecting and categorising errors in L2 Italian texts. Four LLMs were considered: ChatGPT, Copilot, Gemini and Llama3. Two groups of human annotators were involved: L1 and L2 speakers of Italian. A gold standard set of annotations was developed. A fine-grained annotation scheme was adopted, to reflect the specific traits of Italian morphosyntax, with related potential learner errors. Overall, we found that human annotation outperforms AI, with some degree of variation with respect tospecific error types. An increased attention to languages other than English in NLP may significantly improve AI performance in this pivotal task for the many domains of language-related disciplines.</abstract>
<identifier type="citekey">fioravanti-etal-2024-automatic</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.44/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>366</start>
<end>372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Error Detection: Comparing AI vs. Human Performance on L2 Italian Texts
%A Fioravanti, Irene
%A Forti, Luciana
%A Spina, Stefania
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F fioravanti-etal-2024-automatic
%X This paper reports on a study aimed at comparing AI vs. human performance in detecting and categorising errors in L2 Italian texts. Four LLMs were considered: ChatGPT, Copilot, Gemini and Llama3. Two groups of human annotators were involved: L1 and L2 speakers of Italian. A gold standard set of annotations was developed. A fine-grained annotation scheme was adopted, to reflect the specific traits of Italian morphosyntax, with related potential learner errors. Overall, we found that human annotation outperforms AI, with some degree of variation with respect tospecific error types. An increased attention to languages other than English in NLP may significantly improve AI performance in this pivotal task for the many domains of language-related disciplines.
%U https://aclanthology.org/2024.clicit-1.44/
%P 366-372
Markdown (Informal)
[Automatic Error Detection: Comparing AI vs. Human Performance on L2 Italian Texts](https://aclanthology.org/2024.clicit-1.44/) (Fioravanti et al., CLiC-it 2024)
ACL