@inproceedings{tabuzo-etal-2026-phmartiallawner,
title = "{PHM}artial{L}aw{NER}: A {T}agalog Named Entity Recognition Corpus for the {P}hilippine Martial Law Era",
author = "Tabuzo, Abdiel Clarence and
Velazco, Vladimir Gray and
Cabral, Cassandra and
Lacsam, Moneah Shaila and
Ponay, Charmaine Salvador",
editor = {Hamilton, Sil and
{\"O}hman, Emily and
Hicke, Rebecca M. M. and
Bizzoni, Yuri and
Bax, Axel and
Matthews, Jacob A. and
H{\"a}m{\"a}l{\"a}inen, Mika},
booktitle = "Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities",
month = jul,
year = "2026",
address = "San Diego, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.nlp4dh-1.16/",
pages = "167--177",
ISBN = "979-8-89176-427-9",
abstract = "Historical corpora for Tagalog remain limited, particularly texts produced during the Martial Law period under the dictatorship of Ferdinand Marcos Sr. (1972{--}1986). Much of this material remains undigitized, restricting computational analysis of a significant period in Philippine political history. To support research on historical Tagalog texts, we introduce PHMartialLawNER, a gold-standard named entity recognition corpus constructed from newspapers and underground publications of the Martial Law era. The corpus includes approximately 13k extracted sentence segments (362,000 tokens), consolidated into 8k annotated text spans through a semi-automatic pipeline with manual validation. The reliability of the annotation is measured using Cohen{'}s $\kappa$, reaching 0.86 on all tokens and 0.72 on annotated tokens, with a pairwise F1-score of 0.74. The schema defines historically relevant entity categories including Person (Individual, Collective), Organization (Political, Government, Other), Event (Local, International), Production (Media, Government, Doctrine), as well as Time, Numerical Statistics, Location, and Object entities, specifically identifying weapon artifacts. We establish baseline performance using GLiNER variants, calamanCy models, and transformer-based architectures under zero-shot and few-shot settings. The PHMartialLawNER corpus will be publicly released to support Tagalog NLP, historical text processing, and digital humanities research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tabuzo-etal-2026-phmartiallawner">
<titleInfo>
<title>PHMartialLawNER: A Tagalog Named Entity Recognition Corpus for the Philippine Martial Law Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdiel</namePart>
<namePart type="given">Clarence</namePart>
<namePart type="family">Tabuzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vladimir</namePart>
<namePart type="given">Gray</namePart>
<namePart type="family">Velazco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cassandra</namePart>
<namePart type="family">Cabral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moneah</namePart>
<namePart type="given">Shaila</namePart>
<namePart type="family">Lacsam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Charmaine</namePart>
<namePart type="given">Salvador</namePart>
<namePart type="family">Ponay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sil</namePart>
<namePart type="family">Hamilton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="given">M</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Hicke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Axel</namePart>
<namePart type="family">Bax</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Matthews</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-427-9</identifier>
</relatedItem>
<abstract>Historical corpora for Tagalog remain limited, particularly texts produced during the Martial Law period under the dictatorship of Ferdinand Marcos Sr. (1972–1986). Much of this material remains undigitized, restricting computational analysis of a significant period in Philippine political history. To support research on historical Tagalog texts, we introduce PHMartialLawNER, a gold-standard named entity recognition corpus constructed from newspapers and underground publications of the Martial Law era. The corpus includes approximately 13k extracted sentence segments (362,000 tokens), consolidated into 8k annotated text spans through a semi-automatic pipeline with manual validation. The reliability of the annotation is measured using Cohen’s ąppa, reaching 0.86 on all tokens and 0.72 on annotated tokens, with a pairwise F1-score of 0.74. The schema defines historically relevant entity categories including Person (Individual, Collective), Organization (Political, Government, Other), Event (Local, International), Production (Media, Government, Doctrine), as well as Time, Numerical Statistics, Location, and Object entities, specifically identifying weapon artifacts. We establish baseline performance using GLiNER variants, calamanCy models, and transformer-based architectures under zero-shot and few-shot settings. The PHMartialLawNER corpus will be publicly released to support Tagalog NLP, historical text processing, and digital humanities research.</abstract>
<identifier type="citekey">tabuzo-etal-2026-phmartiallawner</identifier>
<location>
<url>https://aclanthology.org/2026.nlp4dh-1.16/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>167</start>
<end>177</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PHMartialLawNER: A Tagalog Named Entity Recognition Corpus for the Philippine Martial Law Era
%A Tabuzo, Abdiel Clarence
%A Velazco, Vladimir Gray
%A Cabral, Cassandra
%A Lacsam, Moneah Shaila
%A Ponay, Charmaine Salvador
%Y Hamilton, Sil
%Y Öhman, Emily
%Y Hicke, Rebecca M. M.
%Y Bizzoni, Yuri
%Y Bax, Axel
%Y Matthews, Jacob A.
%Y Hämäläinen, Mika
%S Proceedings of the 6th International Conference on Natural Language Processing for the Digital Humanities
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, USA
%@ 979-8-89176-427-9
%F tabuzo-etal-2026-phmartiallawner
%X Historical corpora for Tagalog remain limited, particularly texts produced during the Martial Law period under the dictatorship of Ferdinand Marcos Sr. (1972–1986). Much of this material remains undigitized, restricting computational analysis of a significant period in Philippine political history. To support research on historical Tagalog texts, we introduce PHMartialLawNER, a gold-standard named entity recognition corpus constructed from newspapers and underground publications of the Martial Law era. The corpus includes approximately 13k extracted sentence segments (362,000 tokens), consolidated into 8k annotated text spans through a semi-automatic pipeline with manual validation. The reliability of the annotation is measured using Cohen’s ąppa, reaching 0.86 on all tokens and 0.72 on annotated tokens, with a pairwise F1-score of 0.74. The schema defines historically relevant entity categories including Person (Individual, Collective), Organization (Political, Government, Other), Event (Local, International), Production (Media, Government, Doctrine), as well as Time, Numerical Statistics, Location, and Object entities, specifically identifying weapon artifacts. We establish baseline performance using GLiNER variants, calamanCy models, and transformer-based architectures under zero-shot and few-shot settings. The PHMartialLawNER corpus will be publicly released to support Tagalog NLP, historical text processing, and digital humanities research.
%U https://aclanthology.org/2026.nlp4dh-1.16/
%P 167-177
Markdown (Informal)
[PHMartialLawNER: A Tagalog Named Entity Recognition Corpus for the Philippine Martial Law Era](https://aclanthology.org/2026.nlp4dh-1.16/) (Tabuzo et al., NLP4DH 2026)
ACL