@inproceedings{alghamdi-etal-2026-knowledge,
title = "A Knowledge Graph Based Diagnostic Framework for Analyzing Hallucinations in {A}rabic Machine Reading Comprehension",
author = "AlGhamdi, Najwa Abdullah and
Al-Azani, Sadam and
Nuamah, Kwabena and
Bundy, Alan",
booktitle = "Proceedings of the 2nd Workshop on {NLP} for Languages Using {A}rabic Script",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.abjadnlp-1.49/",
pages = "413--421",
abstract = "Large Language Models (LLMs) frequently generate answers that are fluent but not fully grounded in the provided context, a phenomenon commonly referred to as hallucination. While recent work has explored hallucination detection primarily in English and open domain settings, comparatively little attention has been given to Arabic machine reading comprehension (MRC), particularly in culturally sensitive domains such as Qur{'}anic texts. In this paper, we present a knowledge graph based diagnostic framework for analyzing hallucinations and question misalignment in Arabic MRC. Rather than proposing a new detection model or metric, the framework provides an interpretable, triple level analysis of model generated answers by comparing subject-relation-object representations derived from the passage, the question, and the answer. The approach incorporates question-aware filtering and operates under weak supervision, combining automatic analysis with targeted human adjudication to handle annotation gaps and semantic ambiguity. We apply the framework to the Qur{'}anic Reading Comprehension Dataset (QRCD) and demonstrate how it exposes systematic hallucination patterns that are difficult to capture using surface level similarity metrics alone, particularly for questions requiring justification or abstract interpretation. The results highlight the value of structured, transparent diagnostic evaluation for understanding LLM behavior in low resource and high stakes Arabic NLP settings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alghamdi-etal-2026-knowledge">
<titleInfo>
<title>A Knowledge Graph Based Diagnostic Framework for Analyzing Hallucinations in Arabic Machine Reading Comprehension</title>
</titleInfo>
<name type="personal">
<namePart type="given">Najwa</namePart>
<namePart type="given">Abdullah</namePart>
<namePart type="family">AlGhamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadam</namePart>
<namePart type="family">Al-Azani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kwabena</namePart>
<namePart type="family">Nuamah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Bundy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large Language Models (LLMs) frequently generate answers that are fluent but not fully grounded in the provided context, a phenomenon commonly referred to as hallucination. While recent work has explored hallucination detection primarily in English and open domain settings, comparatively little attention has been given to Arabic machine reading comprehension (MRC), particularly in culturally sensitive domains such as Qur’anic texts. In this paper, we present a knowledge graph based diagnostic framework for analyzing hallucinations and question misalignment in Arabic MRC. Rather than proposing a new detection model or metric, the framework provides an interpretable, triple level analysis of model generated answers by comparing subject-relation-object representations derived from the passage, the question, and the answer. The approach incorporates question-aware filtering and operates under weak supervision, combining automatic analysis with targeted human adjudication to handle annotation gaps and semantic ambiguity. We apply the framework to the Qur’anic Reading Comprehension Dataset (QRCD) and demonstrate how it exposes systematic hallucination patterns that are difficult to capture using surface level similarity metrics alone, particularly for questions requiring justification or abstract interpretation. The results highlight the value of structured, transparent diagnostic evaluation for understanding LLM behavior in low resource and high stakes Arabic NLP settings.</abstract>
<identifier type="citekey">alghamdi-etal-2026-knowledge</identifier>
<location>
<url>https://aclanthology.org/2026.abjadnlp-1.49/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>413</start>
<end>421</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Knowledge Graph Based Diagnostic Framework for Analyzing Hallucinations in Arabic Machine Reading Comprehension
%A AlGhamdi, Najwa Abdullah
%A Al-Azani, Sadam
%A Nuamah, Kwabena
%A Bundy, Alan
%S Proceedings of the 2nd Workshop on NLP for Languages Using Arabic Script
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%F alghamdi-etal-2026-knowledge
%X Large Language Models (LLMs) frequently generate answers that are fluent but not fully grounded in the provided context, a phenomenon commonly referred to as hallucination. While recent work has explored hallucination detection primarily in English and open domain settings, comparatively little attention has been given to Arabic machine reading comprehension (MRC), particularly in culturally sensitive domains such as Qur’anic texts. In this paper, we present a knowledge graph based diagnostic framework for analyzing hallucinations and question misalignment in Arabic MRC. Rather than proposing a new detection model or metric, the framework provides an interpretable, triple level analysis of model generated answers by comparing subject-relation-object representations derived from the passage, the question, and the answer. The approach incorporates question-aware filtering and operates under weak supervision, combining automatic analysis with targeted human adjudication to handle annotation gaps and semantic ambiguity. We apply the framework to the Qur’anic Reading Comprehension Dataset (QRCD) and demonstrate how it exposes systematic hallucination patterns that are difficult to capture using surface level similarity metrics alone, particularly for questions requiring justification or abstract interpretation. The results highlight the value of structured, transparent diagnostic evaluation for understanding LLM behavior in low resource and high stakes Arabic NLP settings.
%U https://aclanthology.org/2026.abjadnlp-1.49/
%P 413-421
Markdown (Informal)
[A Knowledge Graph Based Diagnostic Framework for Analyzing Hallucinations in Arabic Machine Reading Comprehension](https://aclanthology.org/2026.abjadnlp-1.49/) (AlGhamdi et al., AbjadNLP 2026)
ACL