@inproceedings{anuradha-etal-2025-holobert,
title = "{H}olo{BERT}: Pre-Trained Transformer Model for Historical Narratives",
author = "Anuradha, Isuri and
Ha, Le An and
Mitkov, Ruslan",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.12/",
pages = "105--110",
abstract = "Oral texts often contain spontaneous, unstructured language with features like disfluencies, colloquialisms, and non-standard syntax. In this paper, we investigate how further pretraining language models with specialised learning objectives for oral and transcribed texts to enhance Named Entity Recognition (NER) performance in Holocaust-related discourse. To evaluate our models, we compare the extracted named entities (NE) against those from other pretrained models on historical texts and generative AI models such as GPT. Furthermore, we demonstrate practical applications of the recognised NEs by linking them to a knowledge base as structured metadata and representing them in a graph format. With these contributions, our work illustrates how the further-pretrain-and-fine-tune paradigm in Natural Language Processing advances research in Digital Humanities."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="anuradha-etal-2025-holobert">
<titleInfo>
<title>HoloBERT: Pre-Trained Transformer Model for Historical Narratives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Isuri</namePart>
<namePart type="family">Anuradha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Le</namePart>
<namePart type="given">An</namePart>
<namePart type="family">Ha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Oral texts often contain spontaneous, unstructured language with features like disfluencies, colloquialisms, and non-standard syntax. In this paper, we investigate how further pretraining language models with specialised learning objectives for oral and transcribed texts to enhance Named Entity Recognition (NER) performance in Holocaust-related discourse. To evaluate our models, we compare the extracted named entities (NE) against those from other pretrained models on historical texts and generative AI models such as GPT. Furthermore, we demonstrate practical applications of the recognised NEs by linking them to a knowledge base as structured metadata and representing them in a graph format. With these contributions, our work illustrates how the further-pretrain-and-fine-tune paradigm in Natural Language Processing advances research in Digital Humanities.</abstract>
<identifier type="citekey">anuradha-etal-2025-holobert</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.12/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>105</start>
<end>110</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HoloBERT: Pre-Trained Transformer Model for Historical Narratives
%A Anuradha, Isuri
%A Ha, Le An
%A Mitkov, Ruslan
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F anuradha-etal-2025-holobert
%X Oral texts often contain spontaneous, unstructured language with features like disfluencies, colloquialisms, and non-standard syntax. In this paper, we investigate how further pretraining language models with specialised learning objectives for oral and transcribed texts to enhance Named Entity Recognition (NER) performance in Holocaust-related discourse. To evaluate our models, we compare the extracted named entities (NE) against those from other pretrained models on historical texts and generative AI models such as GPT. Furthermore, we demonstrate practical applications of the recognised NEs by linking them to a knowledge base as structured metadata and representing them in a graph format. With these contributions, our work illustrates how the further-pretrain-and-fine-tune paradigm in Natural Language Processing advances research in Digital Humanities.
%U https://aclanthology.org/2025.ranlp-1.12/
%P 105-110
Markdown (Informal)
[HoloBERT: Pre-Trained Transformer Model for Historical Narratives](https://aclanthology.org/2025.ranlp-1.12/) (Anuradha et al., RANLP 2025)
ACL
- Isuri Anuradha, Le An Ha, and Ruslan Mitkov. 2025. HoloBERT: Pre-Trained Transformer Model for Historical Narratives. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 105–110, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.