@inproceedings{mercelis-keersmaekers-2022-electra,
title = "An {ELECTRA} Model for {L}atin Token Tagging Tasks",
author = "Mercelis, Wouter and
Keersmaekers, Alek",
editor = "Sprugnoli, Rachele and
Passarotti, Marco",
booktitle = "Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lt4hala-1.30",
pages = "189--192",
abstract = "This report describes the KU Leuven / Brepols-CTLO submission to EvaLatin 2022. We present the results of our current small Latin ELECTRA model, which will be expanded to a larger model in the future. For the lemmatization task, we combine a neural token-tagging approach with the in-house rule-based lemma lists from Brepols{'} ReFlex software. The results are decent, but suffer from inconsistencies between Brepols{'} and EvaLatin{'}s definitions of a lemma. For POS-tagging, the results come up just short from the first place in this competition, mainly struggling with proper nouns. For morphological tagging, there is much more room for improvement. Here, the constraints added to our Multiclass Multilabel model were often not tight enough, causing missing morphological features. We will further investigate why the combination of the different morphological features, which perform fine on their own, leads to issues.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mercelis-keersmaekers-2022-electra">
<titleInfo>
<title>An ELECTRA Model for Latin Token Tagging Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wouter</namePart>
<namePart type="family">Mercelis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alek</namePart>
<namePart type="family">Keersmaekers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rachele</namePart>
<namePart type="family">Sprugnoli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This report describes the KU Leuven / Brepols-CTLO submission to EvaLatin 2022. We present the results of our current small Latin ELECTRA model, which will be expanded to a larger model in the future. For the lemmatization task, we combine a neural token-tagging approach with the in-house rule-based lemma lists from Brepols’ ReFlex software. The results are decent, but suffer from inconsistencies between Brepols’ and EvaLatin’s definitions of a lemma. For POS-tagging, the results come up just short from the first place in this competition, mainly struggling with proper nouns. For morphological tagging, there is much more room for improvement. Here, the constraints added to our Multiclass Multilabel model were often not tight enough, causing missing morphological features. We will further investigate why the combination of the different morphological features, which perform fine on their own, leads to issues.</abstract>
<identifier type="citekey">mercelis-keersmaekers-2022-electra</identifier>
<location>
<url>https://aclanthology.org/2022.lt4hala-1.30</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>189</start>
<end>192</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An ELECTRA Model for Latin Token Tagging Tasks
%A Mercelis, Wouter
%A Keersmaekers, Alek
%Y Sprugnoli, Rachele
%Y Passarotti, Marco
%S Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F mercelis-keersmaekers-2022-electra
%X This report describes the KU Leuven / Brepols-CTLO submission to EvaLatin 2022. We present the results of our current small Latin ELECTRA model, which will be expanded to a larger model in the future. For the lemmatization task, we combine a neural token-tagging approach with the in-house rule-based lemma lists from Brepols’ ReFlex software. The results are decent, but suffer from inconsistencies between Brepols’ and EvaLatin’s definitions of a lemma. For POS-tagging, the results come up just short from the first place in this competition, mainly struggling with proper nouns. For morphological tagging, there is much more room for improvement. Here, the constraints added to our Multiclass Multilabel model were often not tight enough, causing missing morphological features. We will further investigate why the combination of the different morphological features, which perform fine on their own, leads to issues.
%U https://aclanthology.org/2022.lt4hala-1.30
%P 189-192
Markdown (Informal)
[An ELECTRA Model for Latin Token Tagging Tasks](https://aclanthology.org/2022.lt4hala-1.30) (Mercelis & Keersmaekers, LT4HALA 2022)
ACL
- Wouter Mercelis and Alek Keersmaekers. 2022. An ELECTRA Model for Latin Token Tagging Tasks. In Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages, pages 189–192, Marseille, France. European Language Resources Association.