@inproceedings{bilgin-tasdemir-ozates-2025-nakbatr,
title = "{N}akba{TR}: A {T}urkish {NER} Dataset for Nakba Narratives",
author = {Bilgin Tasdemir, Esma Fat{\i}ma and
{\"O}zate{\c{s}}, {\c{S}}aziye Bet{\"u}l},
editor = "Jarrar, Mustafa and
Habash, Habash and
El-Haj, Mo",
booktitle = "Proceedings of the first International Workshop on Nakba Narratives as Language Resources",
month = jan,
year = "2025",
address = "Abu Dhabi",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nakbanlp-1.13/",
pages = "122--126",
abstract = "This paper introduces a novel, annotated Named Entity Recognition (NER) dataset derived from a collection of 181 news articles about the Nakba and its witnesses. Given their prominence as a primary source of information on the Nakba in Turkish, news articles were selected as the primary data source. Some 4,032 news sentences are collected from web sites of two news agencies, Anadolu Ajans{\i} and TRTHaber. We applied a filtering process to make sure that only the news which contain witness testimonies regarding the ongoing Nakba are included in the dataset. After a semi-automatic annotation for entities of type Person, Location, and Organization, we obtained a NER dataset of 2,289 PERSON, 5,875 LOCATION, and 1,299 ORGANIZATION tags. We expect the dataset to be useful in several NLP tasks such as sentiment analysis and relation extraction for Nakba event while providing a new language resource for Turkish. As a future work, we aim to improve the dataset by increasing the number of news and entity types."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bilgin-tasdemir-ozates-2025-nakbatr">
<titleInfo>
<title>NakbaTR: A Turkish NER Dataset for Nakba Narratives</title>
</titleInfo>
<name type="personal">
<namePart type="given">Esma</namePart>
<namePart type="given">Fatıma</namePart>
<namePart type="family">Bilgin Tasdemir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Şaziye</namePart>
<namePart type="given">Betül</namePart>
<namePart type="family">Özateş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first International Workshop on Nakba Narratives as Language Resources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Habash</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces a novel, annotated Named Entity Recognition (NER) dataset derived from a collection of 181 news articles about the Nakba and its witnesses. Given their prominence as a primary source of information on the Nakba in Turkish, news articles were selected as the primary data source. Some 4,032 news sentences are collected from web sites of two news agencies, Anadolu Ajansı and TRTHaber. We applied a filtering process to make sure that only the news which contain witness testimonies regarding the ongoing Nakba are included in the dataset. After a semi-automatic annotation for entities of type Person, Location, and Organization, we obtained a NER dataset of 2,289 PERSON, 5,875 LOCATION, and 1,299 ORGANIZATION tags. We expect the dataset to be useful in several NLP tasks such as sentiment analysis and relation extraction for Nakba event while providing a new language resource for Turkish. As a future work, we aim to improve the dataset by increasing the number of news and entity types.</abstract>
<identifier type="citekey">bilgin-tasdemir-ozates-2025-nakbatr</identifier>
<location>
<url>https://aclanthology.org/2025.nakbanlp-1.13/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>122</start>
<end>126</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NakbaTR: A Turkish NER Dataset for Nakba Narratives
%A Bilgin Tasdemir, Esma Fatıma
%A Özateş, Şaziye Betül
%Y Jarrar, Mustafa
%Y Habash, Habash
%Y El-Haj, Mo
%S Proceedings of the first International Workshop on Nakba Narratives as Language Resources
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi
%F bilgin-tasdemir-ozates-2025-nakbatr
%X This paper introduces a novel, annotated Named Entity Recognition (NER) dataset derived from a collection of 181 news articles about the Nakba and its witnesses. Given their prominence as a primary source of information on the Nakba in Turkish, news articles were selected as the primary data source. Some 4,032 news sentences are collected from web sites of two news agencies, Anadolu Ajansı and TRTHaber. We applied a filtering process to make sure that only the news which contain witness testimonies regarding the ongoing Nakba are included in the dataset. After a semi-automatic annotation for entities of type Person, Location, and Organization, we obtained a NER dataset of 2,289 PERSON, 5,875 LOCATION, and 1,299 ORGANIZATION tags. We expect the dataset to be useful in several NLP tasks such as sentiment analysis and relation extraction for Nakba event while providing a new language resource for Turkish. As a future work, we aim to improve the dataset by increasing the number of news and entity types.
%U https://aclanthology.org/2025.nakbanlp-1.13/
%P 122-126
Markdown (Informal)
[NakbaTR: A Turkish NER Dataset for Nakba Narratives](https://aclanthology.org/2025.nakbanlp-1.13/) (Bilgin Tasdemir & Özateş, NakbaNLP 2025)
ACL
- Esma Fatıma Bilgin Tasdemir and Şaziye Betül Özateş. 2025. NakbaTR: A Turkish NER Dataset for Nakba Narratives. In Proceedings of the first International Workshop on Nakba Narratives as Language Resources, pages 122–126, Abu Dhabi. Association for Computational Linguistics.