@inproceedings{bahaj-ghogho-2026-mizanqa,
title = "{M}izan{QA}: A Benchmark for Multi-Answer {M}oroccan Legal {QA}",
author = "Bahaj, Adil and
Ghogho, Mounir",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-industry.10/",
pages = "132--144",
ISBN = "979-8-89176-384-5",
abstract = "We present MizanQA, a benchmark for assessing LLMs on Moroccan legal MCQs, many with multiple correct answers. Covering 1,776 expert-verified questions in Modern Standard Arabic enriched with Moroccan idioms, the dataset reflects influences from Maliki jurisprudence, customary law, and French legal traditions. Unlike single-answer settings, MizanQA features variable option counts, creating added difficulty. We evaluate multilingual and Arabic-centric models in zero-shot, native-Arabic prompts, measuring accuracy, a precision-penalized F1-like score, and calibration errors. Results show large performance gaps and miscalibration, particularly under stricter penalties. By scoping this benchmark to parametric knowledge only, we provide a baseline for future retrieval-augmented and rationale-focused setups."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bahaj-ghogho-2026-mizanqa">
<titleInfo>
<title>MizanQA: A Benchmark for Multi-Answer Moroccan Legal QA</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adil</namePart>
<namePart type="family">Bahaj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mounir</namePart>
<namePart type="family">Ghogho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yevgen</namePart>
<namePart type="family">Matusevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gülşen</namePart>
<namePart type="family">Eryiğit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikolaos</namePart>
<namePart type="family">Aletras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-384-5</identifier>
</relatedItem>
<abstract>We present MizanQA, a benchmark for assessing LLMs on Moroccan legal MCQs, many with multiple correct answers. Covering 1,776 expert-verified questions in Modern Standard Arabic enriched with Moroccan idioms, the dataset reflects influences from Maliki jurisprudence, customary law, and French legal traditions. Unlike single-answer settings, MizanQA features variable option counts, creating added difficulty. We evaluate multilingual and Arabic-centric models in zero-shot, native-Arabic prompts, measuring accuracy, a precision-penalized F1-like score, and calibration errors. Results show large performance gaps and miscalibration, particularly under stricter penalties. By scoping this benchmark to parametric knowledge only, we provide a baseline for future retrieval-augmented and rationale-focused setups.</abstract>
<identifier type="citekey">bahaj-ghogho-2026-mizanqa</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-industry.10/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>132</start>
<end>144</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MizanQA: A Benchmark for Multi-Answer Moroccan Legal QA
%A Bahaj, Adil
%A Ghogho, Mounir
%Y Matusevych, Yevgen
%Y Eryiğit, Gülşen
%Y Aletras, Nikolaos
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-384-5
%F bahaj-ghogho-2026-mizanqa
%X We present MizanQA, a benchmark for assessing LLMs on Moroccan legal MCQs, many with multiple correct answers. Covering 1,776 expert-verified questions in Modern Standard Arabic enriched with Moroccan idioms, the dataset reflects influences from Maliki jurisprudence, customary law, and French legal traditions. Unlike single-answer settings, MizanQA features variable option counts, creating added difficulty. We evaluate multilingual and Arabic-centric models in zero-shot, native-Arabic prompts, measuring accuracy, a precision-penalized F1-like score, and calibration errors. Results show large performance gaps and miscalibration, particularly under stricter penalties. By scoping this benchmark to parametric knowledge only, we provide a baseline for future retrieval-augmented and rationale-focused setups.
%U https://aclanthology.org/2026.eacl-industry.10/
%P 132-144
Markdown (Informal)
[MizanQA: A Benchmark for Multi-Answer Moroccan Legal QA](https://aclanthology.org/2026.eacl-industry.10/) (Bahaj & Ghogho, EACL 2026)
ACL
- Adil Bahaj and Mounir Ghogho. 2026. MizanQA: A Benchmark for Multi-Answer Moroccan Legal QA. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 5: Industry Track), pages 132–144, Rabat, Morocco. Association for Computational Linguistics.