@inproceedings{habash-etal-2025-guidelines,
title = "Guidelines for Fine-grained Sentence-level {A}rabic Readability Annotation",
author = "Habash, Nizar and
Taha-Thomure, Hanada and
Elmadani, Khalid N. and
Zeino, Zeina and
Abushmaes, Abdallah",
editor = "Peng, Siyao and
Rehbein, Ines",
booktitle = "Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.law-1.30/",
doi = "10.18653/v1/2025.law-1.30",
pages = "359--376",
ISBN = "979-8-89176-262-6",
abstract = "This paper presents the annotation guidelines of the Balanced Arabic Readability Evaluation Corpus (BAREC), a large-scale resource for fine-grained sentence-level readability assessment in Arabic. BAREC includes 69,441 sentences (1M+ words) labeled across 19 levels, from kindergarten to postgraduate. Based on the Taha/Arabi21 framework, the guidelines were refined through iterative training with native Arabic-speaking educators. We highlight key linguistic, pedagogical, and cognitive factors in determining readability and report high inter-annotator agreement: Quadratic Weighted Kappa 81.8{\%} (substantial/excellent agreement) in the last annotation phase. We also benchmark automatic readability models across multiple classification granularities (19-, 7-, 5-, and 3-level). The corpus and guidelines are publicly available: http://barec.camel-lab.com."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="habash-etal-2025-guidelines">
<titleInfo>
<title>Guidelines for Fine-grained Sentence-level Arabic Readability Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanada</namePart>
<namePart type="family">Taha-Thomure</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Elmadani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeina</namePart>
<namePart type="family">Zeino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdallah</namePart>
<namePart type="family">Abushmaes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siyao</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ines</namePart>
<namePart type="family">Rehbein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-262-6</identifier>
</relatedItem>
<abstract>This paper presents the annotation guidelines of the Balanced Arabic Readability Evaluation Corpus (BAREC), a large-scale resource for fine-grained sentence-level readability assessment in Arabic. BAREC includes 69,441 sentences (1M+ words) labeled across 19 levels, from kindergarten to postgraduate. Based on the Taha/Arabi21 framework, the guidelines were refined through iterative training with native Arabic-speaking educators. We highlight key linguistic, pedagogical, and cognitive factors in determining readability and report high inter-annotator agreement: Quadratic Weighted Kappa 81.8% (substantial/excellent agreement) in the last annotation phase. We also benchmark automatic readability models across multiple classification granularities (19-, 7-, 5-, and 3-level). The corpus and guidelines are publicly available: http://barec.camel-lab.com.</abstract>
<identifier type="citekey">habash-etal-2025-guidelines</identifier>
<identifier type="doi">10.18653/v1/2025.law-1.30</identifier>
<location>
<url>https://aclanthology.org/2025.law-1.30/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>359</start>
<end>376</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Guidelines for Fine-grained Sentence-level Arabic Readability Annotation
%A Habash, Nizar
%A Taha-Thomure, Hanada
%A Elmadani, Khalid N.
%A Zeino, Zeina
%A Abushmaes, Abdallah
%Y Peng, Siyao
%Y Rehbein, Ines
%S Proceedings of the 19th Linguistic Annotation Workshop (LAW-XIX-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-262-6
%F habash-etal-2025-guidelines
%X This paper presents the annotation guidelines of the Balanced Arabic Readability Evaluation Corpus (BAREC), a large-scale resource for fine-grained sentence-level readability assessment in Arabic. BAREC includes 69,441 sentences (1M+ words) labeled across 19 levels, from kindergarten to postgraduate. Based on the Taha/Arabi21 framework, the guidelines were refined through iterative training with native Arabic-speaking educators. We highlight key linguistic, pedagogical, and cognitive factors in determining readability and report high inter-annotator agreement: Quadratic Weighted Kappa 81.8% (substantial/excellent agreement) in the last annotation phase. We also benchmark automatic readability models across multiple classification granularities (19-, 7-, 5-, and 3-level). The corpus and guidelines are publicly available: http://barec.camel-lab.com.
%R 10.18653/v1/2025.law-1.30
%U https://aclanthology.org/2025.law-1.30/
%U https://doi.org/10.18653/v1/2025.law-1.30
%P 359-376
Markdown (Informal)
[Guidelines for Fine-grained Sentence-level Arabic Readability Annotation](https://aclanthology.org/2025.law-1.30/) (Habash et al., LAW 2025)
ACL