@inproceedings{bouchekif-etal-2025-assessing,
title = "Assessing Large Language Models on Islamic Legal Reasoning: Evidence from Inheritance Law Evaluation",
author = "Bouchekif, Abdessalam and
Rashwani, Samer and
Sbahi, Heba and
Gaben, Shahd and
Al Khatib, Mutaz and
Ghaly, Mohammed",
editor = "Darwish, Kareem and
Ali, Ahmed and
Abu Farha, Ibrahim and
Touileb, Samia and
Zitouni, Imed and
Abdelali, Ahmed and
Al-Ghamdi, Sharefah and
Alkhereyf, Sakhar and
Zaghouani, Wajdi and
Khalifa, Salam and
AlKhamissi, Badr and
Almatham, Rawan and
Hamed, Injy and
Alyafeai, Zaid and
Alowisheq, Areeb and
Inoue, Go and
Mrini, Khalil and
Alshammari, Waad",
booktitle = "Proceedings of The Third Arabic Natural Language Processing Conference",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.arabicnlp-main.20/",
pages = "246--257",
ISBN = "979-8-89176-352-4",
abstract = "This paper evaluates the knowledge and reasoning capabilities of Large Language Models in Islamic inheritance law, ʿilm al-maw{\={a}}r{\={i}}th. We assess the performance of seven LLMs using a benchmark of 1,000 multiple-choice questions covering diverse inheritance scenarios, designed to test each model{'}s ability{---}from understanding the inheritance context to computing the distribution of shares prescribed by Islamic jurisprudence. The results show a wide performance gap among models. o3 and Gemini 2.5 achieved accuracies above 90{\%}, while ALLaM, Fanar, LLaMA, and Mistral scored below 50{\%}. These disparities reflect important differences in reasoning ability and domain adaptation.We conduct a detailed error analysis to identify recurring failure patterns across models, including misunderstandings of inheritance scenarios, incorrect application of legal rules, and insufficient domain knowledge. Our findings highlight the limitations of current models in handling structured legal reasoning and suggest directions for improving their performance in Islamic legal reasoning."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bouchekif-etal-2025-assessing">
<titleInfo>
<title>Assessing Large Language Models on Islamic Legal Reasoning: Evidence from Inheritance Law Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdessalam</namePart>
<namePart type="family">Bouchekif</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samer</namePart>
<namePart type="family">Rashwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heba</namePart>
<namePart type="family">Sbahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shahd</namePart>
<namePart type="family">Gaben</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mutaz</namePart>
<namePart type="family">Al Khatib</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Ghaly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of The Third Arabic Natural Language Processing Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sharefah</namePart>
<namePart type="family">Al-Ghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakhar</namePart>
<namePart type="family">Alkhereyf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="family">AlKhamissi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injy</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zaid</namePart>
<namePart type="family">Alyafeai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Areeb</namePart>
<namePart type="family">Alowisheq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Go</namePart>
<namePart type="family">Inoue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Waad</namePart>
<namePart type="family">Alshammari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-352-4</identifier>
</relatedItem>
<abstract>This paper evaluates the knowledge and reasoning capabilities of Large Language Models in Islamic inheritance law, ʿilm al-mawārīth. We assess the performance of seven LLMs using a benchmark of 1,000 multiple-choice questions covering diverse inheritance scenarios, designed to test each model’s ability—from understanding the inheritance context to computing the distribution of shares prescribed by Islamic jurisprudence. The results show a wide performance gap among models. o3 and Gemini 2.5 achieved accuracies above 90%, while ALLaM, Fanar, LLaMA, and Mistral scored below 50%. These disparities reflect important differences in reasoning ability and domain adaptation.We conduct a detailed error analysis to identify recurring failure patterns across models, including misunderstandings of inheritance scenarios, incorrect application of legal rules, and insufficient domain knowledge. Our findings highlight the limitations of current models in handling structured legal reasoning and suggest directions for improving their performance in Islamic legal reasoning.</abstract>
<identifier type="citekey">bouchekif-etal-2025-assessing</identifier>
<location>
<url>https://aclanthology.org/2025.arabicnlp-main.20/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>246</start>
<end>257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Large Language Models on Islamic Legal Reasoning: Evidence from Inheritance Law Evaluation
%A Bouchekif, Abdessalam
%A Rashwani, Samer
%A Sbahi, Heba
%A Gaben, Shahd
%A Al Khatib, Mutaz
%A Ghaly, Mohammed
%Y Darwish, Kareem
%Y Ali, Ahmed
%Y Abu Farha, Ibrahim
%Y Touileb, Samia
%Y Zitouni, Imed
%Y Abdelali, Ahmed
%Y Al-Ghamdi, Sharefah
%Y Alkhereyf, Sakhar
%Y Zaghouani, Wajdi
%Y Khalifa, Salam
%Y AlKhamissi, Badr
%Y Almatham, Rawan
%Y Hamed, Injy
%Y Alyafeai, Zaid
%Y Alowisheq, Areeb
%Y Inoue, Go
%Y Mrini, Khalil
%Y Alshammari, Waad
%S Proceedings of The Third Arabic Natural Language Processing Conference
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-352-4
%F bouchekif-etal-2025-assessing
%X This paper evaluates the knowledge and reasoning capabilities of Large Language Models in Islamic inheritance law, ʿilm al-mawārīth. We assess the performance of seven LLMs using a benchmark of 1,000 multiple-choice questions covering diverse inheritance scenarios, designed to test each model’s ability—from understanding the inheritance context to computing the distribution of shares prescribed by Islamic jurisprudence. The results show a wide performance gap among models. o3 and Gemini 2.5 achieved accuracies above 90%, while ALLaM, Fanar, LLaMA, and Mistral scored below 50%. These disparities reflect important differences in reasoning ability and domain adaptation.We conduct a detailed error analysis to identify recurring failure patterns across models, including misunderstandings of inheritance scenarios, incorrect application of legal rules, and insufficient domain knowledge. Our findings highlight the limitations of current models in handling structured legal reasoning and suggest directions for improving their performance in Islamic legal reasoning.
%U https://aclanthology.org/2025.arabicnlp-main.20/
%P 246-257
Markdown (Informal)
[Assessing Large Language Models on Islamic Legal Reasoning: Evidence from Inheritance Law Evaluation](https://aclanthology.org/2025.arabicnlp-main.20/) (Bouchekif et al., ArabicNLP 2025)
ACL