@inproceedings{zbrze-z-ny-etal-2026-arabic,
title = "The {A}rabic {B}ible as an Evaluation Tool: The Case Study of the Khal{\={i}}l{\={i}} {A}rabic Dialect",
author = "Zbrze{\.z}ny, Jakub and
Reiter, Ehud and
Zhao, Wei",
editor = "Mahamood, Saad and
Howcroft, David M. and
van Deemter, Kees and
Balloccu, Simone and
Sivaprasad, Adarsa and
Sundararajan, Barkavi and
Bugar{\'i}n Diz, Alberto and
Alonso-Moral, Jose Mar{\'i}a",
booktitle = "Proceedings of the 1st Symposium on Natural Language Generation Evaluations",
month = jun,
year = "2026",
address = "Aberdeen, United Kingdom",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.retroeval-main.4/",
pages = "24--32",
ISBN = "979-8-89176-436-1",
abstract = "The paper presents a fully documented case study of how high-quality data combined with evaluators' expertise can be utilised for conducting basic NLP experiments in the realm of low-resource languages such as local varieties of Colloquial Arabic, and how the Arabic Bible, hitherto underutilised in NLP, can serve as an evaluation tool. Our experiments on one of the rural Palestinian Arabic dialects of al-Khal{\={i}}l / Hebron illustrate two points. On the one hand, popular models are clearly limited in their ability to produce outputs of a high level of dialectal specificity (here: rural area surrounding a major urban centre). On the other hand, they are capable to generate accurate translations from such dialects into Modern Standard Arabic. Thus, the models appear better at understanding dialects than at producing dialects."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zbrze-z-ny-etal-2026-arabic">
<titleInfo>
<title>The Arabic Bible as an Evaluation Tool: The Case Study of the Khalīlī Arabic Dialect</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Zbrzeżny</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ehud</namePart>
<namePart type="family">Reiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Symposium on Natural Language Generation Evaluations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Howcroft</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simone</namePart>
<namePart type="family">Balloccu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adarsa</namePart>
<namePart type="family">Sivaprasad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barkavi</namePart>
<namePart type="family">Sundararajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alberto</namePart>
<namePart type="family">Bugarín Diz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">María</namePart>
<namePart type="family">Alonso-Moral</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Aberdeen, United Kingdom</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-436-1</identifier>
</relatedItem>
<abstract>The paper presents a fully documented case study of how high-quality data combined with evaluators’ expertise can be utilised for conducting basic NLP experiments in the realm of low-resource languages such as local varieties of Colloquial Arabic, and how the Arabic Bible, hitherto underutilised in NLP, can serve as an evaluation tool. Our experiments on one of the rural Palestinian Arabic dialects of al-Khalīl / Hebron illustrate two points. On the one hand, popular models are clearly limited in their ability to produce outputs of a high level of dialectal specificity (here: rural area surrounding a major urban centre). On the other hand, they are capable to generate accurate translations from such dialects into Modern Standard Arabic. Thus, the models appear better at understanding dialects than at producing dialects.</abstract>
<identifier type="citekey">zbrze-z-ny-etal-2026-arabic</identifier>
<location>
<url>https://aclanthology.org/2026.retroeval-main.4/</url>
</location>
<part>
<date>2026-06</date>
<extent unit="page">
<start>24</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Arabic Bible as an Evaluation Tool: The Case Study of the Khalīlī Arabic Dialect
%A Zbrzeżny, Jakub
%A Reiter, Ehud
%A Zhao, Wei
%Y Mahamood, Saad
%Y Howcroft, David M.
%Y van Deemter, Kees
%Y Balloccu, Simone
%Y Sivaprasad, Adarsa
%Y Sundararajan, Barkavi
%Y Bugarín Diz, Alberto
%Y Alonso-Moral, Jose María
%S Proceedings of the 1st Symposium on Natural Language Generation Evaluations
%D 2026
%8 June
%I Association for Computational Linguistics
%C Aberdeen, United Kingdom
%@ 979-8-89176-436-1
%F zbrze-z-ny-etal-2026-arabic
%X The paper presents a fully documented case study of how high-quality data combined with evaluators’ expertise can be utilised for conducting basic NLP experiments in the realm of low-resource languages such as local varieties of Colloquial Arabic, and how the Arabic Bible, hitherto underutilised in NLP, can serve as an evaluation tool. Our experiments on one of the rural Palestinian Arabic dialects of al-Khalīl / Hebron illustrate two points. On the one hand, popular models are clearly limited in their ability to produce outputs of a high level of dialectal specificity (here: rural area surrounding a major urban centre). On the other hand, they are capable to generate accurate translations from such dialects into Modern Standard Arabic. Thus, the models appear better at understanding dialects than at producing dialects.
%U https://aclanthology.org/2026.retroeval-main.4/
%P 24-32
Markdown (Informal)
[The Arabic Bible as an Evaluation Tool: The Case Study of the Khalīlī Arabic Dialect](https://aclanthology.org/2026.retroeval-main.4/) (Zbrzeżny et al., RetroEval 2026)
ACL