@inproceedings{liqreina-etal-2023-arabic,
title = "{A}rabic Fine-Grained Entity Recognition",
author = "Liqreina, Haneen and
Jarrar, Mustafa and
Khalilia, Mohammed and
El-Shangiti, Ahmed and
Abdul-Mageed, Muhammad",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.arabicnlp-1.25",
doi = "10.18653/v1/2023.arabicnlp-1.25",
pages = "310--323",
abstract = "Traditional NER systems are typically trained to recognize coarse-grained categories of entities, and less attention is given to classifying entities into a hierarchy of fine-grained lower-level sub-types. This article aims to advance Arabic NER with fine-grained entities. We chose to extend Wojood (an open-source Nested Arabic Named Entity Corpus) with sub-types. In particular, four main entity types in Wojood (geopolitical entity (GPE), location (LOC), organization (ORG), and facility (FAC) are extended with 31 sub-types of entities. To do this, we first revised Wojood{'}s annotations of GPE, LOC, ORG, and FAC to be compatible with the LDC{'}s ACE guidelines, which yielded 5, 614 changes. Second, all mentions of GPE, LOC, ORG, and FAC ({\textasciitilde} 44K) in Wojood are manually annotated with the LDC{'}s ACE subtypes. This extended version of Wojood is called WojoodFine. To evaluate our annotations, we measured the inter-annotator agreement (IAA) using both Cohen{'}s Kappa and F1 score, resulting in 0.9861 and 0.9889, respectively. To compute the baselines of WojoodFine, we fine-tune three pre-trained Arabic BERT encoders in three settings: flat NER, nested NER and nested NER with sub-types and achieved F1 score of 0.920, 0.866, and 0.885, respectively. Our corpus and models are open source and available at https://sina.birzeit.edu/wojood/.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="liqreina-etal-2023-arabic">
<titleInfo>
<title>Arabic Fine-Grained Entity Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haneen</namePart>
<namePart type="family">Liqreina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Khalilia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">El-Shangiti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ArabicNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sawaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samhaa</namePart>
<namePart type="family">El-Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">Keleg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Traditional NER systems are typically trained to recognize coarse-grained categories of entities, and less attention is given to classifying entities into a hierarchy of fine-grained lower-level sub-types. This article aims to advance Arabic NER with fine-grained entities. We chose to extend Wojood (an open-source Nested Arabic Named Entity Corpus) with sub-types. In particular, four main entity types in Wojood (geopolitical entity (GPE), location (LOC), organization (ORG), and facility (FAC) are extended with 31 sub-types of entities. To do this, we first revised Wojood’s annotations of GPE, LOC, ORG, and FAC to be compatible with the LDC’s ACE guidelines, which yielded 5, 614 changes. Second, all mentions of GPE, LOC, ORG, and FAC (~ 44K) in Wojood are manually annotated with the LDC’s ACE subtypes. This extended version of Wojood is called WojoodFine. To evaluate our annotations, we measured the inter-annotator agreement (IAA) using both Cohen’s Kappa and F1 score, resulting in 0.9861 and 0.9889, respectively. To compute the baselines of WojoodFine, we fine-tune three pre-trained Arabic BERT encoders in three settings: flat NER, nested NER and nested NER with sub-types and achieved F1 score of 0.920, 0.866, and 0.885, respectively. Our corpus and models are open source and available at https://sina.birzeit.edu/wojood/.</abstract>
<identifier type="citekey">liqreina-etal-2023-arabic</identifier>
<identifier type="doi">10.18653/v1/2023.arabicnlp-1.25</identifier>
<location>
<url>https://aclanthology.org/2023.arabicnlp-1.25</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>310</start>
<end>323</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic Fine-Grained Entity Recognition
%A Liqreina, Haneen
%A Jarrar, Mustafa
%A Khalilia, Mohammed
%A El-Shangiti, Ahmed
%A Abdul-Mageed, Muhammad
%Y Sawaf, Hassan
%Y El-Beltagy, Samhaa
%Y Zaghouani, Wajdi
%Y Magdy, Walid
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Habash, Nizar
%Y Khalifa, Salam
%Y Keleg, Amr
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y Mrini, Khalil
%Y Almatham, Rawan
%S Proceedings of ArabicNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore (Hybrid)
%F liqreina-etal-2023-arabic
%X Traditional NER systems are typically trained to recognize coarse-grained categories of entities, and less attention is given to classifying entities into a hierarchy of fine-grained lower-level sub-types. This article aims to advance Arabic NER with fine-grained entities. We chose to extend Wojood (an open-source Nested Arabic Named Entity Corpus) with sub-types. In particular, four main entity types in Wojood (geopolitical entity (GPE), location (LOC), organization (ORG), and facility (FAC) are extended with 31 sub-types of entities. To do this, we first revised Wojood’s annotations of GPE, LOC, ORG, and FAC to be compatible with the LDC’s ACE guidelines, which yielded 5, 614 changes. Second, all mentions of GPE, LOC, ORG, and FAC (~ 44K) in Wojood are manually annotated with the LDC’s ACE subtypes. This extended version of Wojood is called WojoodFine. To evaluate our annotations, we measured the inter-annotator agreement (IAA) using both Cohen’s Kappa and F1 score, resulting in 0.9861 and 0.9889, respectively. To compute the baselines of WojoodFine, we fine-tune three pre-trained Arabic BERT encoders in three settings: flat NER, nested NER and nested NER with sub-types and achieved F1 score of 0.920, 0.866, and 0.885, respectively. Our corpus and models are open source and available at https://sina.birzeit.edu/wojood/.
%R 10.18653/v1/2023.arabicnlp-1.25
%U https://aclanthology.org/2023.arabicnlp-1.25
%U https://doi.org/10.18653/v1/2023.arabicnlp-1.25
%P 310-323
Markdown (Informal)
[Arabic Fine-Grained Entity Recognition](https://aclanthology.org/2023.arabicnlp-1.25) (Liqreina et al., ArabicNLP-WS 2023)
ACL
- Haneen Liqreina, Mustafa Jarrar, Mohammed Khalilia, Ahmed El-Shangiti, and Muhammad Abdul-Mageed. 2023. Arabic Fine-Grained Entity Recognition. In Proceedings of ArabicNLP 2023, pages 310–323, Singapore (Hybrid). Association for Computational Linguistics.