@inproceedings{passarotti-etal-2025-harmonizing,
title = "Harmonizing Divergent Lemmatization and Part-of-Speech Tagging Practices for {L}atin Participles through the {L}i{L}a Knowledge Base",
author = "Passarotti, Marco and
Iurescia, Federica and
Ruffolo, Paolo",
editor = "Peng, Siyao and
Rehbein, Ines",
booktitle = "Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.law-1.8/",
doi = "10.18653/v1/2025.law-1.8",
pages = "103--114",
ISBN = "979-8-89176-262-6",
abstract = "This paper addresses the challenge of divergent lemmatization and part-of-speech (PoS) tagging practices for Latin participles in annotated corpora. We propose a solution through the LiLa Knowledge Base, a Linked Open Data framework designed to unify lexical and textual data for Latin. Using lemmas as the point of connection between distributed textual and lexical resources, LiLa introduces hypolemmas {---} secondary citation forms belonging to a word{'}s inflectional paradigm {---} as a means of reconciling divergent annotations for participles. Rather than advocating a single uniform annotation scheme, LiLa preserves each resource{'}s native guidelines while ensuring that users can retrieve and analyze participial data seamlessly. Via empirical assessments of multiple Latin corpora, we show how the LiLa{'}s integration of lemmas and hypolemmas enables consistent retrieval of participle forms regardless of whether they are categorized as verbal or adjectival."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="passarotti-etal-2025-harmonizing">
<titleInfo>
<title>Harmonizing Divergent Lemmatization and Part-of-Speech Tagging Practices for Latin Participles through the LiLa Knowledge Base</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Passarotti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federica</namePart>
<namePart type="family">Iurescia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Ruffolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siyao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-262-6</identifier>
</relatedItem>
<abstract>This paper addresses the challenge of divergent lemmatization and part-of-speech (PoS) tagging practices for Latin participles in annotated corpora. We propose a solution through the LiLa Knowledge Base, a Linked Open Data framework designed to unify lexical and textual data for Latin. Using lemmas as the point of connection between distributed textual and lexical resources, LiLa introduces hypolemmas — secondary citation forms belonging to a word’s inflectional paradigm — as a means of reconciling divergent annotations for participles. Rather than advocating a single uniform annotation scheme, LiLa preserves each resource’s native guidelines while ensuring that users can retrieve and analyze participial data seamlessly. Via empirical assessments of multiple Latin corpora, we show how the LiLa’s integration of lemmas and hypolemmas enables consistent retrieval of participle forms regardless of whether they are categorized as verbal or adjectival.</abstract>
<identifier type="citekey">passarotti-etal-2025-harmonizing</identifier>
<identifier type="doi">10.18653/v1/2025.law-1.8</identifier>
<location>
<url>https://aclanthology.org/2025.law-1.8/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>103</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harmonizing Divergent Lemmatization and Part-of-Speech Tagging Practices for Latin Participles through the LiLa Knowledge Base
%A Passarotti, Marco
%A Iurescia, Federica
%A Ruffolo, Paolo
%Y Peng, Siyao
%Y Rehbein, Ines
%S Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-262-6
%F passarotti-etal-2025-harmonizing
%X This paper addresses the challenge of divergent lemmatization and part-of-speech (PoS) tagging practices for Latin participles in annotated corpora. We propose a solution through the LiLa Knowledge Base, a Linked Open Data framework designed to unify lexical and textual data for Latin. Using lemmas as the point of connection between distributed textual and lexical resources, LiLa introduces hypolemmas — secondary citation forms belonging to a word’s inflectional paradigm — as a means of reconciling divergent annotations for participles. Rather than advocating a single uniform annotation scheme, LiLa preserves each resource’s native guidelines while ensuring that users can retrieve and analyze participial data seamlessly. Via empirical assessments of multiple Latin corpora, we show how the LiLa’s integration of lemmas and hypolemmas enables consistent retrieval of participle forms regardless of whether they are categorized as verbal or adjectival.
%R 10.18653/v1/2025.law-1.8
%U https://aclanthology.org/2025.law-1.8/
%U https://doi.org/10.18653/v1/2025.law-1.8
%P 103-114
Markdown (Informal)
[Harmonizing Divergent Lemmatization and Part-of-Speech Tagging Practices for Latin Participles through the LiLa Knowledge Base](https://aclanthology.org/2025.law-1.8/) (Passarotti et al., LAW 2025)
ACL