@inproceedings{corbetta-etal-2024-join,
title = "Join Together? Combining Data to Parse {I}talian Texts",
author = "Corbetta, Claudia and
Moretti, Giovanni and
Passarotti, Marco",
editor = "Dell'Orletta, Felice and
Lenci, Alessandro and
Montemagni, Simonetta and
Sprugnoli, Rachele",
booktitle = "Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)",
month = dec,
year = "2024",
address = "Pisa, Italy",
publisher = "CEUR Workshop Proceedings",
url = "https://aclanthology.org/2024.clicit-1.30/",
pages = "251--257",
ISBN = "979-12-210-7060-6",
abstract = "In this paper, we create and evaluate non-combined and combined models using Old and Contemporary Italian data to determine whether increasing the size of the training data with a combined model could improve parsing accuracy to facilitate manual annotation. We find that, despite the increased size of the training data, in-domain parsing performs better. Additionally, we discover that models trained on Old Italian data perform better on Contemporary Italian data than the reverse. We attempt to explain this result in terms of syntactic complexity, finding that Old Italian text exhibits higher sentence length and non-projectivity rate."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="corbetta-etal-2024-join">
<titleInfo>
<title>Join Together? Combining Data to Parse Italian Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Corbetta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giovanni</namePart>
<namePart type="family">Moretti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Felice</namePart>
<namePart type="family">Dell’Orletta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simonetta</namePart>
<namePart type="family">Montemagni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>CEUR Workshop Proceedings</publisher>
<place>
<placeTerm type="text">Pisa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-12-210-7060-6</identifier>
</relatedItem>
<abstract>In this paper, we create and evaluate non-combined and combined models using Old and Contemporary Italian data to determine whether increasing the size of the training data with a combined model could improve parsing accuracy to facilitate manual annotation. We find that, despite the increased size of the training data, in-domain parsing performs better. Additionally, we discover that models trained on Old Italian data perform better on Contemporary Italian data than the reverse. We attempt to explain this result in terms of syntactic complexity, finding that Old Italian text exhibits higher sentence length and non-projectivity rate.</abstract>
<identifier type="citekey">corbetta-etal-2024-join</identifier>
<location>
<url>https://aclanthology.org/2024.clicit-1.30/</url>
</location>
<part>
<date>2024-12</date>
<extent unit="page">
<start>251</start>
<end>257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Join Together? Combining Data to Parse Italian Texts
%A Corbetta, Claudia
%A Moretti, Giovanni
%A Passarotti, Marco
%Y Dell’Orletta, Felice
%Y Lenci, Alessandro
%Y Montemagni, Simonetta
%Y Sprugnoli, Rachele
%S Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024)
%D 2024
%8 December
%I CEUR Workshop Proceedings
%C Pisa, Italy
%@ 979-12-210-7060-6
%F corbetta-etal-2024-join
%X In this paper, we create and evaluate non-combined and combined models using Old and Contemporary Italian data to determine whether increasing the size of the training data with a combined model could improve parsing accuracy to facilitate manual annotation. We find that, despite the increased size of the training data, in-domain parsing performs better. Additionally, we discover that models trained on Old Italian data perform better on Contemporary Italian data than the reverse. We attempt to explain this result in terms of syntactic complexity, finding that Old Italian text exhibits higher sentence length and non-projectivity rate.
%U https://aclanthology.org/2024.clicit-1.30/
%P 251-257
Markdown (Informal)
[Join Together? Combining Data to Parse Italian Texts](https://aclanthology.org/2024.clicit-1.30/) (Corbetta et al., CLiC-it 2024)
ACL
- Claudia Corbetta, Giovanni Moretti, and Marco Passarotti. 2024. Join Together? Combining Data to Parse Italian Texts. In Proceedings of the 10th Italian Conference on Computational Linguistics (CLiC-it 2024), pages 251–257, Pisa, Italy. CEUR Workshop Proceedings.