@inproceedings{hudspeth-etal-2024-latin,
title = "{L}atin Treebanks in Review: An Evaluation of Morphological Tagging Across Time",
author = "Hudspeth, Marisa and
O{'}Connor, Brendan and
Thompson, Laure",
editor = "Pavlopoulos, John and
Sommerschield, Thea and
Assael, Yannis and
Gordin, Shai and
Cho, Kyunghyun and
Passarotti, Marco and
Sprugnoli, Rachele and
Liu, Yudong and
Li, Bin and
Anderson, Adam",
booktitle = "Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)",
month = aug,
year = "2024",
address = "Hybrid in Bangkok, Thailand and online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.ml4al-1.21",
pages = "203--218",
abstract = "Existing Latin treebanks draw from Latin{'}s long written tradition, spanning 17 centuries and a variety of cultures. Recent efforts have begun to harmonize these treebanks{'} annotations to better train and evaluate morphological taggers. However, the heterogeneity of these treebanks must be carefully considered to build effective and reliable data. In this work, we review existing Latin treebanks to identify the texts they draw from, identify their overlap, and document their coverage across time and genre. We additionally design automated conversions of their morphological feature annotations into the conventions of standard Latin grammar. From this, we build new time-period data splits that draw from the existing treebanks which we use to perform a broad cross-time analysis for POS and morphological feature tagging. We find that BERT-based taggers outperform existing taggers while also being more robust to cross-domain shifts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hudspeth-etal-2024-latin">
<titleInfo>
<title>Latin Treebanks in Review: An Evaluation of Morphological Tagging Across Time</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marisa</namePart>
<namePart type="family">Hudspeth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Brendan</namePart>
<namePart type="family">O’Connor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laure</namePart>
<namePart type="family">Thompson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Pavlopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thea</namePart>
<namePart type="family">Sommerschield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yannis</namePart>
<namePart type="family">Assael</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shai</namePart>
<namePart type="family">Gordin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyunghyun</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yudong</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bin</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Anderson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hybrid in Bangkok, Thailand and online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Existing Latin treebanks draw from Latin’s long written tradition, spanning 17 centuries and a variety of cultures. Recent efforts have begun to harmonize these treebanks’ annotations to better train and evaluate morphological taggers. However, the heterogeneity of these treebanks must be carefully considered to build effective and reliable data. In this work, we review existing Latin treebanks to identify the texts they draw from, identify their overlap, and document their coverage across time and genre. We additionally design automated conversions of their morphological feature annotations into the conventions of standard Latin grammar. From this, we build new time-period data splits that draw from the existing treebanks which we use to perform a broad cross-time analysis for POS and morphological feature tagging. We find that BERT-based taggers outperform existing taggers while also being more robust to cross-domain shifts.</abstract>
<identifier type="citekey">hudspeth-etal-2024-latin</identifier>
<location>
<url>https://aclanthology.org/2024.ml4al-1.21</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>203</start>
<end>218</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Latin Treebanks in Review: An Evaluation of Morphological Tagging Across Time
%A Hudspeth, Marisa
%A O’Connor, Brendan
%A Thompson, Laure
%Y Pavlopoulos, John
%Y Sommerschield, Thea
%Y Assael, Yannis
%Y Gordin, Shai
%Y Cho, Kyunghyun
%Y Passarotti, Marco
%Y Sprugnoli, Rachele
%Y Liu, Yudong
%Y Li, Bin
%Y Anderson, Adam
%S Proceedings of the 1st Workshop on Machine Learning for Ancient Languages (ML4AL 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Hybrid in Bangkok, Thailand and online
%F hudspeth-etal-2024-latin
%X Existing Latin treebanks draw from Latin’s long written tradition, spanning 17 centuries and a variety of cultures. Recent efforts have begun to harmonize these treebanks’ annotations to better train and evaluate morphological taggers. However, the heterogeneity of these treebanks must be carefully considered to build effective and reliable data. In this work, we review existing Latin treebanks to identify the texts they draw from, identify their overlap, and document their coverage across time and genre. We additionally design automated conversions of their morphological feature annotations into the conventions of standard Latin grammar. From this, we build new time-period data splits that draw from the existing treebanks which we use to perform a broad cross-time analysis for POS and morphological feature tagging. We find that BERT-based taggers outperform existing taggers while also being more robust to cross-domain shifts.
%U https://aclanthology.org/2024.ml4al-1.21
%P 203-218
Markdown (Informal)
[Latin Treebanks in Review: An Evaluation of Morphological Tagging Across Time](https://aclanthology.org/2024.ml4al-1.21) (Hudspeth et al., ML4AL-WS 2024)
ACL