@inproceedings{kovacs-etal-2026-twentys,
title = "Twenty{'}s Plenty: Semantic Scaffolding and Span Architecture for 19-Label {NER} in Medieval {L}atin Charters",
author = "Kov{\'a}cs, Tam{\'a}s and
Consolo, Giuseppe and
Vogeler, Georg",
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlp4dh-1.22/",
pages = "236--241",
ISBN = "979-8-89176-427-9",
abstract = "This study investigates whether a high-quality, 19-label named entity recogniser for medieval Latin charters can be constructed using only a few hundred annotated sentences. The authors introduce ``semantic scaffolding,'' an innovation that utilizes richly descriptive English label phrases as prompts to activate latent multilingual knowledge within the model. This is paired with a custom span-based architecture utilizing XLM-ROBERTa-large, 4-head attention pooling to handle long property descriptions, and a hybrid loss system including Asymmetric Focal-Dice and InfoNCE contrastive terms. Results demonstrate that semantic scaffolding enables fine-tuned GLiNER to reach 80.8{\%} overlap F1, while the custom architecture achieves 83.4{\%} overlap F1 using only 298 training sentences. Significantly, the paper provides an empirical demonstration that domain-specific pre-training on medieval Latin offers no performance advantage once task-specific fine-tuning is applied. While the model excels at frequent categories like PER (95.7{\%} F1) and LOC (93.5{\%} F1), challenges persist for rare, position-dependent legal categories such as LEG (53.1{\%} F1) and TRANS (52.6{\%} F1)."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kovacs-etal-2026-twentys">
<titleInfo>
<title>Twenty’s Plenty: Semantic Scaffolding and Span Architecture for 19-Label NER in Medieval Latin Charters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tamás</namePart>
<namePart type="family">Kovács</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Consolo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Georg</namePart>
<namePart type="family">Vogeler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sil</namePart>
<namePart type="family">Hamilton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="given">M</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Hicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Axel</namePart>
<namePart type="family">Bax</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Matthews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-427-9</identifier>
</relatedItem>
<abstract>This study investigates whether a high-quality, 19-label named entity recogniser for medieval Latin charters can be constructed using only a few hundred annotated sentences. The authors introduce “semantic scaffolding,” an innovation that utilizes richly descriptive English label phrases as prompts to activate latent multilingual knowledge within the model. This is paired with a custom span-based architecture utilizing XLM-ROBERTa-large, 4-head attention pooling to handle long property descriptions, and a hybrid loss system including Asymmetric Focal-Dice and InfoNCE contrastive terms. Results demonstrate that semantic scaffolding enables fine-tuned GLiNER to reach 80.8% overlap F1, while the custom architecture achieves 83.4% overlap F1 using only 298 training sentences. Significantly, the paper provides an empirical demonstration that domain-specific pre-training on medieval Latin offers no performance advantage once task-specific fine-tuning is applied. While the model excels at frequent categories like PER (95.7% F1) and LOC (93.5% F1), challenges persist for rare, position-dependent legal categories such as LEG (53.1% F1) and TRANS (52.6% F1).</abstract>
<identifier type="citekey">kovacs-etal-2026-twentys</identifier>
<location>
<url>https://aclanthology.org/2026.nlp4dh-1.22/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>236</start>
<end>241</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Twenty’s Plenty: Semantic Scaffolding and Span Architecture for 19-Label NER in Medieval Latin Charters
%A Kovács, Tamás
%A Consolo, Giuseppe
%A Vogeler, Georg
%Y Hamilton, Sil
%Y Öhman, Emily
%Y Hicke, Rebecca M. M.
%Y Bizzoni, Yuri
%Y Bax, Axel
%Y Matthews, Jacob A.
%Y Hämäläinen, Mika
%S Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA
%@ 979-8-89176-427-9
%F kovacs-etal-2026-twentys
%X This study investigates whether a high-quality, 19-label named entity recogniser for medieval Latin charters can be constructed using only a few hundred annotated sentences. The authors introduce “semantic scaffolding,” an innovation that utilizes richly descriptive English label phrases as prompts to activate latent multilingual knowledge within the model. This is paired with a custom span-based architecture utilizing XLM-ROBERTa-large, 4-head attention pooling to handle long property descriptions, and a hybrid loss system including Asymmetric Focal-Dice and InfoNCE contrastive terms. Results demonstrate that semantic scaffolding enables fine-tuned GLiNER to reach 80.8% overlap F1, while the custom architecture achieves 83.4% overlap F1 using only 298 training sentences. Significantly, the paper provides an empirical demonstration that domain-specific pre-training on medieval Latin offers no performance advantage once task-specific fine-tuning is applied. While the model excels at frequent categories like PER (95.7% F1) and LOC (93.5% F1), challenges persist for rare, position-dependent legal categories such as LEG (53.1% F1) and TRANS (52.6% F1).
%U https://aclanthology.org/2026.nlp4dh-1.22/
%P 236-241
Markdown (Informal)
[Twenty’s Plenty: Semantic Scaffolding and Span Architecture for 19-Label NER in Medieval Latin Charters](https://aclanthology.org/2026.nlp4dh-1.22/) (Kovács et al., NLP4DH 2026)
ACL