@inproceedings{fantoli-de-lhoneux-2022-linguistic,
title = "Linguistic Annotation of Neo-{L}atin Mathematical Texts: A Pilot-Study to Improve the Automatic Parsing of the Archimedes Latinus",
author = "Fantoli, Margherita and
de Lhoneux, Miryam",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.18",
pages = "129--134",
abstract = "This paper describes the process of syntactically parsing the Latin translation by Jacopo da San Cassiano of the Greek mathematical work The Spirals of Archimedes. The Universal Dependencies formalism is adopted. First, we introduce the historical and linguistic importance of Jacopo da San Cassiano{'}s translation. Subsequently, we describe the deep Biaffine parser used for this pilot study. In particular, we motivate the choice of using the technique of treebank embeddings in light of the characteristics of mathematical texts. The paper then details the process of creation of training and test data, by highlighting the most compelling linguistic features of the text and the choices implemented in the current version of the treebank. Finally, the results of the parsing are discussed in comparison to a baseline and the most prominent errors are discussed. Overall, the paper shows the added value of creating specific training data, and of using targeted strategies (as treebank embeddings) to exploit existing annotated corpora while preserving the features of one specific text when performing syntactic parsing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fantoli-de-lhoneux-2022-linguistic">
<titleInfo>
<title>Linguistic Annotation of Neo-Latin Mathematical Texts: A Pilot-Study to Improve the Automatic Parsing of the Archimedes Latinus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Margherita</namePart>
<namePart type="family">Fantoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miryam</namePart>
<namePart type="family">de Lhoneux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the process of syntactically parsing the Latin translation by Jacopo da San Cassiano of the Greek mathematical work The Spirals of Archimedes. The Universal Dependencies formalism is adopted. First, we introduce the historical and linguistic importance of Jacopo da San Cassiano’s translation. Subsequently, we describe the deep Biaffine parser used for this pilot study. In particular, we motivate the choice of using the technique of treebank embeddings in light of the characteristics of mathematical texts. The paper then details the process of creation of training and test data, by highlighting the most compelling linguistic features of the text and the choices implemented in the current version of the treebank. Finally, the results of the parsing are discussed in comparison to a baseline and the most prominent errors are discussed. Overall, the paper shows the added value of creating specific training data, and of using targeted strategies (as treebank embeddings) to exploit existing annotated corpora while preserving the features of one specific text when performing syntactic parsing.</abstract>
<identifier type="citekey">fantoli-de-lhoneux-2022-linguistic</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.18</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>129</start>
<end>134</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Linguistic Annotation of Neo-Latin Mathematical Texts: A Pilot-Study to Improve the Automatic Parsing of the Archimedes Latinus
%A Fantoli, Margherita
%A de Lhoneux, Miryam
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F fantoli-de-lhoneux-2022-linguistic
%X This paper describes the process of syntactically parsing the Latin translation by Jacopo da San Cassiano of the Greek mathematical work The Spirals of Archimedes. The Universal Dependencies formalism is adopted. First, we introduce the historical and linguistic importance of Jacopo da San Cassiano’s translation. Subsequently, we describe the deep Biaffine parser used for this pilot study. In particular, we motivate the choice of using the technique of treebank embeddings in light of the characteristics of mathematical texts. The paper then details the process of creation of training and test data, by highlighting the most compelling linguistic features of the text and the choices implemented in the current version of the treebank. Finally, the results of the parsing are discussed in comparison to a baseline and the most prominent errors are discussed. Overall, the paper shows the added value of creating specific training data, and of using targeted strategies (as treebank embeddings) to exploit existing annotated corpora while preserving the features of one specific text when performing syntactic parsing.
%U https://aclanthology.org/2022.lt4hala-1.18
%P 129-134
Markdown (Informal)
[Linguistic Annotation of Neo-Latin Mathematical Texts: A Pilot-Study to Improve the Automatic Parsing of the Archimedes Latinus](https://aclanthology.org/2022.lt4hala-1.18) (Fantoli & de Lhoneux, LT4HALA 2022)
ACL