@inproceedings{sawalha-etal-2025-masaq,
title = "{MASAQ} Parser: A Fine-grained {M}orpho{S}yntactic Analyzer for the {Q}uran",
author = "Sawalha, Majdi and
Alshargi, Faisal and
Yagi, Sane and
AlShdaifat, Abdallah T. and
Hammo, Bassam",
editor = "Yagi, Sane and
Yagi, Sane and
Sawalha, Majdi and
Shawar, Bayan Abu and
AlShdaifat, Abdallah T. and
Abbas, Norhan and
Organizers",
booktitle = "Proceedings of the New Horizons in Computational Linguistics for Religious Texts",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.clrel-1.7/",
pages = "67--75",
abstract = "This paper introduces the Morphological and Syntactical analysis for the Quran text. In this research we have constructed the MASAQ dataset, a comprehensive resource designed to address the scarcity of annotated Quranic Arabic corpora and facilitate the development of advanced Natural Language Processing (NLP) models. The Quran, being a cornerstone of classical Arabic, presents unique challenges for NLP due to its sacred nature and complex linguistic features. MASAQ provides a detailed syntactic and morphological annotation of the entire Quranic text that includes more than 131K morphological entries and 123K instances of syntactic functions, covering a wide range of grammatical roles and relationships. MASAQ`s unique features include a comprehensive tagset of 72 syntactic roles, detailed morphological analysis, and context-specific annotations. This dataset is particularly valuable for tasks such as dependency parsing, grammar checking, machine translation, and text summarization. The potential applications of MASAQ are vast, ranging from pedagogical uses in teaching Arabic grammar to developing sophisticated NLP tools. By providing a high-quality, syntactically annotated dataset, MASAQ aims to advance the field of Arabic NLP, enabling more accurate and more efficient language processing tools. The dataset is made available under the Creative Commons Attribution 3.0 License, ensuring compliance with ethical guidelines and respecting the integrity of the Quranic text."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sawalha-etal-2025-masaq">
<titleInfo>
<title>MASAQ Parser: A Fine-grained MorphoSyntactic Analyzer for the Quran</title>
</titleInfo>
<name type="personal">
<namePart type="given">Majdi</namePart>
<namePart type="family">Sawalha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Faisal</namePart>
<namePart type="family">Alshargi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sane</namePart>
<namePart type="family">Yagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdallah</namePart>
<namePart type="given">T</namePart>
<namePart type="family">AlShdaifat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bassam</namePart>
<namePart type="family">Hammo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the New Horizons in Computational Linguistics for Religious Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sane</namePart>
<namePart type="family">Yagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Majdi</namePart>
<namePart type="family">Sawalha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bayan</namePart>
<namePart type="given">Abu</namePart>
<namePart type="family">Shawar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdallah</namePart>
<namePart type="given">T</namePart>
<namePart type="family">AlShdaifat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Norhan</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name>
<namePart>Organizers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces the Morphological and Syntactical analysis for the Quran text. In this research we have constructed the MASAQ dataset, a comprehensive resource designed to address the scarcity of annotated Quranic Arabic corpora and facilitate the development of advanced Natural Language Processing (NLP) models. The Quran, being a cornerstone of classical Arabic, presents unique challenges for NLP due to its sacred nature and complex linguistic features. MASAQ provides a detailed syntactic and morphological annotation of the entire Quranic text that includes more than 131K morphological entries and 123K instances of syntactic functions, covering a wide range of grammatical roles and relationships. MASAQ‘s unique features include a comprehensive tagset of 72 syntactic roles, detailed morphological analysis, and context-specific annotations. This dataset is particularly valuable for tasks such as dependency parsing, grammar checking, machine translation, and text summarization. The potential applications of MASAQ are vast, ranging from pedagogical uses in teaching Arabic grammar to developing sophisticated NLP tools. By providing a high-quality, syntactically annotated dataset, MASAQ aims to advance the field of Arabic NLP, enabling more accurate and more efficient language processing tools. The dataset is made available under the Creative Commons Attribution 3.0 License, ensuring compliance with ethical guidelines and respecting the integrity of the Quranic text.</abstract>
<identifier type="citekey">sawalha-etal-2025-masaq</identifier>
<location>
<url>https://aclanthology.org/2025.clrel-1.7/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>67</start>
<end>75</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MASAQ Parser: A Fine-grained MorphoSyntactic Analyzer for the Quran
%A Sawalha, Majdi
%A Alshargi, Faisal
%A Yagi, Sane
%A AlShdaifat, Abdallah T.
%A Hammo, Bassam
%Y Yagi, Sane
%Y Sawalha, Majdi
%Y Shawar, Bayan Abu
%Y AlShdaifat, Abdallah T.
%Y Abbas, Norhan
%E Organizers
%S Proceedings of the New Horizons in Computational Linguistics for Religious Texts
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F sawalha-etal-2025-masaq
%X This paper introduces the Morphological and Syntactical analysis for the Quran text. In this research we have constructed the MASAQ dataset, a comprehensive resource designed to address the scarcity of annotated Quranic Arabic corpora and facilitate the development of advanced Natural Language Processing (NLP) models. The Quran, being a cornerstone of classical Arabic, presents unique challenges for NLP due to its sacred nature and complex linguistic features. MASAQ provides a detailed syntactic and morphological annotation of the entire Quranic text that includes more than 131K morphological entries and 123K instances of syntactic functions, covering a wide range of grammatical roles and relationships. MASAQ‘s unique features include a comprehensive tagset of 72 syntactic roles, detailed morphological analysis, and context-specific annotations. This dataset is particularly valuable for tasks such as dependency parsing, grammar checking, machine translation, and text summarization. The potential applications of MASAQ are vast, ranging from pedagogical uses in teaching Arabic grammar to developing sophisticated NLP tools. By providing a high-quality, syntactically annotated dataset, MASAQ aims to advance the field of Arabic NLP, enabling more accurate and more efficient language processing tools. The dataset is made available under the Creative Commons Attribution 3.0 License, ensuring compliance with ethical guidelines and respecting the integrity of the Quranic text.
%U https://aclanthology.org/2025.clrel-1.7/
%P 67-75
Markdown (Informal)
[MASAQ Parser: A Fine-grained MorphoSyntactic Analyzer for the Quran](https://aclanthology.org/2025.clrel-1.7/) (Sawalha et al., CLRel 2025)
ACL