@inproceedings{nacar-etal-2025-towards,
title = "Towards Inclusive {A}rabic {LLM}s: A Culturally Aligned Benchmark in {A}rabic Large Language Model Evaluation",
author = "Nacar, Omer and
Sibaee, Serry Taiseer and
Ahmed, Samar and
Ben Atitallah, Safa and
Ammar, Adel and
Alhabashi, Yasser and
Al-Batati, Abdulrahman S. and
Alsehibani, Arwa and
Qandos, Nour and
Elshehy, Omar and
Abdelkader, Mohamed and
Koubaa, Anis",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loreslm-1.29/",
pages = "387--401",
abstract = "Arabic Large Language Models are usually evaluated using Western-centric benchmarks that overlook essential cultural contexts, making them less effective and culturally misaligned for Arabic-speaking communities. This study addresses this gap by evaluating the Arabic Massive Multitask Language Understanding (MMLU) Benchmark to assess its cultural alignment and relevance for Arabic Large Language Models (LLMs) across culturally sensitive topics. A team of eleven experts annotated over 2,500 questions, evaluating them based on fluency, adequacy, cultural appropriateness, bias detection, religious sensitivity, and adherence to social norms. Through human assessment, the study highlights significant cultural misalignments and biases, particularly in sensitive areas like religion and morality. In response to these findings, we propose annotation guidelines and integrate culturally enriched data sources to enhance the benchmark`s reliability and relevance. The research highlights the importance of cultural sensitivity in evaluating inclusive Arabic LLMs, fostering more widely accepted LLMs for Arabic-speaking communities."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nacar-etal-2025-towards">
<titleInfo>
<title>Towards Inclusive Arabic LLMs: A Culturally Aligned Benchmark in Arabic Large Language Model Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Omer</namePart>
<namePart type="family">Nacar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serry</namePart>
<namePart type="given">Taiseer</namePart>
<namePart type="family">Sibaee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samar</namePart>
<namePart type="family">Ahmed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Safa</namePart>
<namePart type="family">Ben Atitallah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adel</namePart>
<namePart type="family">Ammar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasser</namePart>
<namePart type="family">Alhabashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdulrahman</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Al-Batati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arwa</namePart>
<namePart type="family">Alsehibani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nour</namePart>
<namePart type="family">Qandos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omar</namePart>
<namePart type="family">Elshehy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Abdelkader</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anis</namePart>
<namePart type="family">Koubaa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Models for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic Large Language Models are usually evaluated using Western-centric benchmarks that overlook essential cultural contexts, making them less effective and culturally misaligned for Arabic-speaking communities. This study addresses this gap by evaluating the Arabic Massive Multitask Language Understanding (MMLU) Benchmark to assess its cultural alignment and relevance for Arabic Large Language Models (LLMs) across culturally sensitive topics. A team of eleven experts annotated over 2,500 questions, evaluating them based on fluency, adequacy, cultural appropriateness, bias detection, religious sensitivity, and adherence to social norms. Through human assessment, the study highlights significant cultural misalignments and biases, particularly in sensitive areas like religion and morality. In response to these findings, we propose annotation guidelines and integrate culturally enriched data sources to enhance the benchmark‘s reliability and relevance. The research highlights the importance of cultural sensitivity in evaluating inclusive Arabic LLMs, fostering more widely accepted LLMs for Arabic-speaking communities.</abstract>
<identifier type="citekey">nacar-etal-2025-towards</identifier>
<location>
<url>https://aclanthology.org/2025.loreslm-1.29/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>387</start>
<end>401</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Inclusive Arabic LLMs: A Culturally Aligned Benchmark in Arabic Large Language Model Evaluation
%A Nacar, Omer
%A Sibaee, Serry Taiseer
%A Ahmed, Samar
%A Ben Atitallah, Safa
%A Ammar, Adel
%A Alhabashi, Yasser
%A Al-Batati, Abdulrahman S.
%A Alsehibani, Arwa
%A Qandos, Nour
%A Elshehy, Omar
%A Abdelkader, Mohamed
%A Koubaa, Anis
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the First Workshop on Language Models for Low-Resource Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F nacar-etal-2025-towards
%X Arabic Large Language Models are usually evaluated using Western-centric benchmarks that overlook essential cultural contexts, making them less effective and culturally misaligned for Arabic-speaking communities. This study addresses this gap by evaluating the Arabic Massive Multitask Language Understanding (MMLU) Benchmark to assess its cultural alignment and relevance for Arabic Large Language Models (LLMs) across culturally sensitive topics. A team of eleven experts annotated over 2,500 questions, evaluating them based on fluency, adequacy, cultural appropriateness, bias detection, religious sensitivity, and adherence to social norms. Through human assessment, the study highlights significant cultural misalignments and biases, particularly in sensitive areas like religion and morality. In response to these findings, we propose annotation guidelines and integrate culturally enriched data sources to enhance the benchmark‘s reliability and relevance. The research highlights the importance of cultural sensitivity in evaluating inclusive Arabic LLMs, fostering more widely accepted LLMs for Arabic-speaking communities.
%U https://aclanthology.org/2025.loreslm-1.29/
%P 387-401
Markdown (Informal)
[Towards Inclusive Arabic LLMs: A Culturally Aligned Benchmark in Arabic Large Language Model Evaluation](https://aclanthology.org/2025.loreslm-1.29/) (Nacar et al., LoResLM 2025)
ACL
- Omer Nacar, Serry Taiseer Sibaee, Samar Ahmed, Safa Ben Atitallah, Adel Ammar, Yasser Alhabashi, Abdulrahman S. Al-Batati, Arwa Alsehibani, Nour Qandos, Omar Elshehy, Mohamed Abdelkader, and Anis Koubaa. 2025. Towards Inclusive Arabic LLMs: A Culturally Aligned Benchmark in Arabic Large Language Model Evaluation. In Proceedings of the First Workshop on Language Models for Low-Resource Languages, pages 387–401, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.