@inproceedings{hamed-zaidkilani-2025-arabic,
title = "{A}rabic Topic Classification Corpus of the Nakba Short Stories",
author = "Hamed, Osama and
Zaidkilani, Nadeem",
editor = "Jarrar, Mustafa and
Habash, Habash and
El-Haj, Mo",
booktitle = "Proceedings of the first International Workshop on Nakba Narratives as Language Resources",
month = jan,
year = "2025",
address = "Abu Dhabi",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nakbanlp-1.6/",
pages = "48--55",
abstract = "In this paper, we enrich Arabic Natural Language Processing (NLP) resources by introducing the {\textquotedblleft}Nakba Topic Classification Corpus (NTCC),{\textquotedblright} a novel annotated Arabic corpus derived from narratives about the Nakba. The NTCC comprises approximately 470 sentences extracted from eight short stories and captures the thematic depth of the Nakba narratives, providing insights into both historical and personal dimensions. The corpus was annotated in a two-step process. One third of the dataset was manually annotated, achieving an IAA of 87{\%} (later resolved to 100{\%}), while the rest was annotated using a rule-based system based on thematic patterns. This approach ensures consistency and reproducibility, enhancing the corpus`s reliability for NLP research. The NTCC contributes to the preservation of the Palestinian cultural heritage while addressing key challenges in Arabic NLP, such as data scarcity and linguistic complexity. By like topic modeling and classification tasks, the NTCC offers a valuable resource for advancing Arabic NLP research and fostering a deeper understanding of the Nakba narratives"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hamed-zaidkilani-2025-arabic">
<titleInfo>
<title>Arabic Topic Classification Corpus of the Nakba Short Stories</title>
</titleInfo>
<name type="personal">
<namePart type="given">Osama</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadeem</namePart>
<namePart type="family">Zaidkilani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the first International Workshop on Nakba Narratives as Language Resources</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Habash</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mo</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we enrich Arabic Natural Language Processing (NLP) resources by introducing the “Nakba Topic Classification Corpus (NTCC),” a novel annotated Arabic corpus derived from narratives about the Nakba. The NTCC comprises approximately 470 sentences extracted from eight short stories and captures the thematic depth of the Nakba narratives, providing insights into both historical and personal dimensions. The corpus was annotated in a two-step process. One third of the dataset was manually annotated, achieving an IAA of 87% (later resolved to 100%), while the rest was annotated using a rule-based system based on thematic patterns. This approach ensures consistency and reproducibility, enhancing the corpus‘s reliability for NLP research. The NTCC contributes to the preservation of the Palestinian cultural heritage while addressing key challenges in Arabic NLP, such as data scarcity and linguistic complexity. By like topic modeling and classification tasks, the NTCC offers a valuable resource for advancing Arabic NLP research and fostering a deeper understanding of the Nakba narratives</abstract>
<identifier type="citekey">hamed-zaidkilani-2025-arabic</identifier>
<location>
<url>https://aclanthology.org/2025.nakbanlp-1.6/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>48</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic Topic Classification Corpus of the Nakba Short Stories
%A Hamed, Osama
%A Zaidkilani, Nadeem
%Y Jarrar, Mustafa
%Y Habash, Habash
%Y El-Haj, Mo
%S Proceedings of the first International Workshop on Nakba Narratives as Language Resources
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi
%F hamed-zaidkilani-2025-arabic
%X In this paper, we enrich Arabic Natural Language Processing (NLP) resources by introducing the “Nakba Topic Classification Corpus (NTCC),” a novel annotated Arabic corpus derived from narratives about the Nakba. The NTCC comprises approximately 470 sentences extracted from eight short stories and captures the thematic depth of the Nakba narratives, providing insights into both historical and personal dimensions. The corpus was annotated in a two-step process. One third of the dataset was manually annotated, achieving an IAA of 87% (later resolved to 100%), while the rest was annotated using a rule-based system based on thematic patterns. This approach ensures consistency and reproducibility, enhancing the corpus‘s reliability for NLP research. The NTCC contributes to the preservation of the Palestinian cultural heritage while addressing key challenges in Arabic NLP, such as data scarcity and linguistic complexity. By like topic modeling and classification tasks, the NTCC offers a valuable resource for advancing Arabic NLP research and fostering a deeper understanding of the Nakba narratives
%U https://aclanthology.org/2025.nakbanlp-1.6/
%P 48-55
Markdown (Informal)
[Arabic Topic Classification Corpus of the Nakba Short Stories](https://aclanthology.org/2025.nakbanlp-1.6/) (Hamed & Zaidkilani, NakbaNLP 2025)
ACL