@inproceedings{sorbi-etal-2026-weakly-supervised,
title = "Weakly Supervised Named Entity Recognition for Historical Texts",
author = "Sorbi, Marco and
Moccozet, Laurent and
Marchand-Maillet, Stephane",
editor = "Alves, Diego and
Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Pagel, Janis and
Szpakowicz, Stan",
booktitle = "Proceedings of the 10th Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.latechclfl-1.6/",
pages = "48--65",
ISBN = "979-8-89176-373-9",
abstract = "Named Entity Recognition has emerged as a critical task in natural language processing, particularly for extracting meaningful information from unstructured text. Although traditional approaches rely heavily on large annotated datasets, recent advances have explored weak supervision techniques to address the limitations of resource-intensive annotation processes. Historical texts provide unique challenges to this task because of their linguistic peculiarities, and several approaches exist to address texts of this domain in a supervised way, but they involve lengthy manual annotations of the documents of interest by domain experts. To address this issue, this paper explores how recent weakly supervised NER techniques can be adapted to historical texts, analyzing their suitability for this domain. The experiments show that domain-specific architectures can be effectively trained on low-resource corpora with weak supervision over a small set of entity labels. Using only 10{\%} of the annotations, the performance of these architectures remains above 80{\%} of the supervised quality in terms of F1-Score."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sorbi-etal-2026-weakly-supervised">
<titleInfo>
<title>Weakly Supervised Named Entity Recognition for Historical Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Sorbi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Moccozet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephane</namePart>
<namePart type="family">Marchand-Maillet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janis</namePart>
<namePart type="family">Pagel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-373-9</identifier>
</relatedItem>
<abstract>Named Entity Recognition has emerged as a critical task in natural language processing, particularly for extracting meaningful information from unstructured text. Although traditional approaches rely heavily on large annotated datasets, recent advances have explored weak supervision techniques to address the limitations of resource-intensive annotation processes. Historical texts provide unique challenges to this task because of their linguistic peculiarities, and several approaches exist to address texts of this domain in a supervised way, but they involve lengthy manual annotations of the documents of interest by domain experts. To address this issue, this paper explores how recent weakly supervised NER techniques can be adapted to historical texts, analyzing their suitability for this domain. The experiments show that domain-specific architectures can be effectively trained on low-resource corpora with weak supervision over a small set of entity labels. Using only 10% of the annotations, the performance of these architectures remains above 80% of the supervised quality in terms of F1-Score.</abstract>
<identifier type="citekey">sorbi-etal-2026-weakly-supervised</identifier>
<location>
<url>https://aclanthology.org/2026.latechclfl-1.6/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>48</start>
<end>65</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Weakly Supervised Named Entity Recognition for Historical Texts
%A Sorbi, Marco
%A Moccozet, Laurent
%A Marchand-Maillet, Stephane
%Y Alves, Diego
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Pagel, Janis
%Y Szpakowicz, Stan
%S Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-373-9
%F sorbi-etal-2026-weakly-supervised
%X Named Entity Recognition has emerged as a critical task in natural language processing, particularly for extracting meaningful information from unstructured text. Although traditional approaches rely heavily on large annotated datasets, recent advances have explored weak supervision techniques to address the limitations of resource-intensive annotation processes. Historical texts provide unique challenges to this task because of their linguistic peculiarities, and several approaches exist to address texts of this domain in a supervised way, but they involve lengthy manual annotations of the documents of interest by domain experts. To address this issue, this paper explores how recent weakly supervised NER techniques can be adapted to historical texts, analyzing their suitability for this domain. The experiments show that domain-specific architectures can be effectively trained on low-resource corpora with weak supervision over a small set of entity labels. Using only 10% of the annotations, the performance of these architectures remains above 80% of the supervised quality in terms of F1-Score.
%U https://aclanthology.org/2026.latechclfl-1.6/
%P 48-65
Markdown (Informal)
[Weakly Supervised Named Entity Recognition for Historical Texts](https://aclanthology.org/2026.latechclfl-1.6/) (Sorbi et al., LaTeCH-CLfL 2026)
ACL
- Marco Sorbi, Laurent Moccozet, and Stephane Marchand-Maillet. 2026. Weakly Supervised Named Entity Recognition for Historical Texts. In Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026, pages 48–65, Rabat, Morocco. Association for Computational Linguistics.