@inproceedings{swaelens-etal-2025-lemmatisation,
title = "Lemmatisation {\&} Morphological Analysis of Unedited {G}reek: Do Simple Tasks Need Complex Solutions?",
author = "Swaelens, Colin and
De Vos, Ilse and
Lefever, Els",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.399/",
doi = "10.18653/v1/2025.findings-acl.399",
pages = "7681--7689",
ISBN = "979-8-89176-256-5",
abstract = "Fine-tuning transformer-based models for part-of-speech tagging of unedited Greek text has outperformed traditional systems. However, when applied to lemmatisation or morphological analysis, fine-tuning has not yet achieved competitive results. This paper explores various approaches to combine morphological features to both reduce label complexity and enhance multi-task training. Specifically, we group three nominal features into a single label, and combine the three most distinctive features of verbs into another unified label. These combined labels are used to fine-tune DBBERT, a BERT model pre-trained on both ancient and modern Greek. Additionally, we experiment with joint training {--} both among these labels and in combination with POS tagging {--} within a multi-task framework to improve performance by transferring parameters. To evaluate our models, we use a manually annotated gold standard from the Database of Byzantine Book Epigrams. Our results show a nearly 9 pp. improvement, demonstrating that multi-task learning is a promising approach for linguistic annotation in less standardised corpora."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="swaelens-etal-2025-lemmatisation">
<titleInfo>
<title>Lemmatisation & Morphological Analysis of Unedited Greek: Do Simple Tasks Need Complex Solutions?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Swaelens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ilse</namePart>
<namePart type="family">De Vos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Els</namePart>
<namePart type="family">Lefever</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Fine-tuning transformer-based models for part-of-speech tagging of unedited Greek text has outperformed traditional systems. However, when applied to lemmatisation or morphological analysis, fine-tuning has not yet achieved competitive results. This paper explores various approaches to combine morphological features to both reduce label complexity and enhance multi-task training. Specifically, we group three nominal features into a single label, and combine the three most distinctive features of verbs into another unified label. These combined labels are used to fine-tune DBBERT, a BERT model pre-trained on both ancient and modern Greek. Additionally, we experiment with joint training – both among these labels and in combination with POS tagging – within a multi-task framework to improve performance by transferring parameters. To evaluate our models, we use a manually annotated gold standard from the Database of Byzantine Book Epigrams. Our results show a nearly 9 pp. improvement, demonstrating that multi-task learning is a promising approach for linguistic annotation in less standardised corpora.</abstract>
<identifier type="citekey">swaelens-etal-2025-lemmatisation</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.399</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.399/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>7681</start>
<end>7689</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lemmatisation & Morphological Analysis of Unedited Greek: Do Simple Tasks Need Complex Solutions?
%A Swaelens, Colin
%A De Vos, Ilse
%A Lefever, Els
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F swaelens-etal-2025-lemmatisation
%X Fine-tuning transformer-based models for part-of-speech tagging of unedited Greek text has outperformed traditional systems. However, when applied to lemmatisation or morphological analysis, fine-tuning has not yet achieved competitive results. This paper explores various approaches to combine morphological features to both reduce label complexity and enhance multi-task training. Specifically, we group three nominal features into a single label, and combine the three most distinctive features of verbs into another unified label. These combined labels are used to fine-tune DBBERT, a BERT model pre-trained on both ancient and modern Greek. Additionally, we experiment with joint training – both among these labels and in combination with POS tagging – within a multi-task framework to improve performance by transferring parameters. To evaluate our models, we use a manually annotated gold standard from the Database of Byzantine Book Epigrams. Our results show a nearly 9 pp. improvement, demonstrating that multi-task learning is a promising approach for linguistic annotation in less standardised corpora.
%R 10.18653/v1/2025.findings-acl.399
%U https://aclanthology.org/2025.findings-acl.399/
%U https://doi.org/10.18653/v1/2025.findings-acl.399
%P 7681-7689
Markdown (Informal)
[Lemmatisation & Morphological Analysis of Unedited Greek: Do Simple Tasks Need Complex Solutions?](https://aclanthology.org/2025.findings-acl.399/) (Swaelens et al., Findings 2025)
ACL