@inproceedings{aljabari-etal-2025-wojoodrelations,
title = "$\mathrm{Wojood^{Relations}}$: {A}rabic Relation Extraction Corpus and Modeling",
author = "Aljabari, Alaa and
Khalilia, Mohammed and
Jarrar, Mustafa",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1741/",
doi = "10.18653/v1/2025.emnlp-main.1741",
pages = "34330--34348",
ISBN = "979-8-89176-332-6",
abstract = "Relation extraction (RE) is a core task in natural language processing, crucial for semantic understanding, knowledge graph construction, and enhancing downstream applications. Existing work on Arabic RE remains limited due to the language{'}s rich morphology and syntactic complexity, and the lack of large, high-quality datasets. In this paper, we present $\mathrm{Wojood^{Relations}}$, the largest and most diverse Arabic RE corpus to date, containing over $33K$ sentences ($\sim550K$ tokens) annotated with $\sim15K$ relation triples across 40 relation types. The corpus is built on top of Wojood NER dataset with manual relation annotations carried out by expert annotators, achieving a Cohen{'}s $\kappa$ of 0.92, indicating high reliability. In addition, we propose two methods: NLI-RE, which formulates RE as a binary natural language inference problem using relation-aware templates, and GPT-Joint, a few-shot LLM framework for joint entity and RE via relation-aware retrieval. Finally, we benchmark the dataset using both supervised models and in-context learning with LLMs. Supervised models achieve 92.89{\%} F1 for RE, while LLMs obtain 72.73{\%} F1 for joint entity and RE. These results establish strong baselines, highlight key challenges, and provide a foundation for advancing Arabic RE research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aljabari-etal-2025-wojoodrelations">
<titleInfo>
<title>Wojood^Relations: Arabic Relation Extraction Corpus and Modeling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alaa</namePart>
<namePart type="family">Aljabari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Khalilia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mustafa</namePart>
<namePart type="family">Jarrar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Relation extraction (RE) is a core task in natural language processing, crucial for semantic understanding, knowledge graph construction, and enhancing downstream applications. Existing work on Arabic RE remains limited due to the language’s rich morphology and syntactic complexity, and the lack of large, high-quality datasets. In this paper, we present Wojood^Relations, the largest and most diverse Arabic RE corpus to date, containing over 33K sentences (\sim550K tokens) annotated with \sim15K relation triples across 40 relation types. The corpus is built on top of Wojood NER dataset with manual relation annotations carried out by expert annotators, achieving a Cohen’s ąppa of 0.92, indicating high reliability. In addition, we propose two methods: NLI-RE, which formulates RE as a binary natural language inference problem using relation-aware templates, and GPT-Joint, a few-shot LLM framework for joint entity and RE via relation-aware retrieval. Finally, we benchmark the dataset using both supervised models and in-context learning with LLMs. Supervised models achieve 92.89% F1 for RE, while LLMs obtain 72.73% F1 for joint entity and RE. These results establish strong baselines, highlight key challenges, and provide a foundation for advancing Arabic RE research.</abstract>
<identifier type="citekey">aljabari-etal-2025-wojoodrelations</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.1741</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1741/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>34330</start>
<end>34348</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Wojood^Relations: Arabic Relation Extraction Corpus and Modeling
%A Aljabari, Alaa
%A Khalilia, Mohammed
%A Jarrar, Mustafa
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F aljabari-etal-2025-wojoodrelations
%X Relation extraction (RE) is a core task in natural language processing, crucial for semantic understanding, knowledge graph construction, and enhancing downstream applications. Existing work on Arabic RE remains limited due to the language’s rich morphology and syntactic complexity, and the lack of large, high-quality datasets. In this paper, we present Wojood^Relations, the largest and most diverse Arabic RE corpus to date, containing over 33K sentences (\sim550K tokens) annotated with \sim15K relation triples across 40 relation types. The corpus is built on top of Wojood NER dataset with manual relation annotations carried out by expert annotators, achieving a Cohen’s ąppa of 0.92, indicating high reliability. In addition, we propose two methods: NLI-RE, which formulates RE as a binary natural language inference problem using relation-aware templates, and GPT-Joint, a few-shot LLM framework for joint entity and RE via relation-aware retrieval. Finally, we benchmark the dataset using both supervised models and in-context learning with LLMs. Supervised models achieve 92.89% F1 for RE, while LLMs obtain 72.73% F1 for joint entity and RE. These results establish strong baselines, highlight key challenges, and provide a foundation for advancing Arabic RE research.
%R 10.18653/v1/2025.emnlp-main.1741
%U https://aclanthology.org/2025.emnlp-main.1741/
%U https://doi.org/10.18653/v1/2025.emnlp-main.1741
%P 34330-34348
Markdown (Informal)
[WojoodRelations: Arabic Relation Extraction Corpus and Modeling](https://aclanthology.org/2025.emnlp-main.1741/) (Aljabari et al., EMNLP 2025)
ACL