@inproceedings{kazakov-etal-2025-towards,
title = "Towards Creating a {B}ulgarian Readability Index",
author = "Kazakov, Dimitar and
Minkov, Stefan and
Margova, Ruslana and
Temnikova, Irina and
Emauilov, Ivo",
editor = "Estevanell-Valladares, Ernesto Luis and
Picazo-Izquierdo, Alicia and
Ranasinghe, Tharindu and
Mikaberidze, Besik and
Ostermann, Simon and
Gurgurov, Daniil and
Mueller, Philipp and
Borg, Claudia and
{\v{S}}imko, Mari{\'a}n",
booktitle = "Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.lowresnlp-1.18/",
pages = "192--200",
abstract = "Readability assessment plays a crucial role in education and text accessibility. While numerous indices exist for English and have been extended to Romance and Slavic languages, Bulgarian remains under- served in this regard. This paper reviews established readability metrics across these language families, examining their underlying features and modelling methods. We then report the first attempt to develop a readability index for Bulgarian, using end-of-school-year assessment questions and literary works targeted at children of various ages. Key linguistic attributes, namely, word length, sentence length, syllable count, and information content (based on word frequency), were extracted, and their first two statistical moments, mean and variance, were modelled against grade levels using linear and polynomial regression. Results suggest that polynomial models outperform linear ones by capturing non-linear relationships between textual features and perceived difficulty, but may be harder to interpret. This work provides an initial framework for building a reliable readability measure for Bulgarian, with applications in educational text design, adaptive learning, and corpus annotation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kazakov-etal-2025-towards">
<titleInfo>
<title>Towards Creating a Bulgarian Readability Index</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dimitar</namePart>
<namePart type="family">Kazakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Minkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslana</namePart>
<namePart type="family">Margova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivo</namePart>
<namePart type="family">Emauilov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ernesto</namePart>
<namePart type="given">Luis</namePart>
<namePart type="family">Estevanell-Valladares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Picazo-Izquierdo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Besik</namePart>
<namePart type="family">Mikaberidze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Ostermann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Gurgurov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Mueller</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Borg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marián</namePart>
<namePart type="family">Šimko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Readability assessment plays a crucial role in education and text accessibility. While numerous indices exist for English and have been extended to Romance and Slavic languages, Bulgarian remains under- served in this regard. This paper reviews established readability metrics across these language families, examining their underlying features and modelling methods. We then report the first attempt to develop a readability index for Bulgarian, using end-of-school-year assessment questions and literary works targeted at children of various ages. Key linguistic attributes, namely, word length, sentence length, syllable count, and information content (based on word frequency), were extracted, and their first two statistical moments, mean and variance, were modelled against grade levels using linear and polynomial regression. Results suggest that polynomial models outperform linear ones by capturing non-linear relationships between textual features and perceived difficulty, but may be harder to interpret. This work provides an initial framework for building a reliable readability measure for Bulgarian, with applications in educational text design, adaptive learning, and corpus annotation.</abstract>
<identifier type="citekey">kazakov-etal-2025-towards</identifier>
<location>
<url>https://aclanthology.org/2025.lowresnlp-1.18/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>192</start>
<end>200</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Creating a Bulgarian Readability Index
%A Kazakov, Dimitar
%A Minkov, Stefan
%A Margova, Ruslana
%A Temnikova, Irina
%A Emauilov, Ivo
%Y Estevanell-Valladares, Ernesto Luis
%Y Picazo-Izquierdo, Alicia
%Y Ranasinghe, Tharindu
%Y Mikaberidze, Besik
%Y Ostermann, Simon
%Y Gurgurov, Daniil
%Y Mueller, Philipp
%Y Borg, Claudia
%Y Šimko, Marián
%S Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F kazakov-etal-2025-towards
%X Readability assessment plays a crucial role in education and text accessibility. While numerous indices exist for English and have been extended to Romance and Slavic languages, Bulgarian remains under- served in this regard. This paper reviews established readability metrics across these language families, examining their underlying features and modelling methods. We then report the first attempt to develop a readability index for Bulgarian, using end-of-school-year assessment questions and literary works targeted at children of various ages. Key linguistic attributes, namely, word length, sentence length, syllable count, and information content (based on word frequency), were extracted, and their first two statistical moments, mean and variance, were modelled against grade levels using linear and polynomial regression. Results suggest that polynomial models outperform linear ones by capturing non-linear relationships between textual features and perceived difficulty, but may be harder to interpret. This work provides an initial framework for building a reliable readability measure for Bulgarian, with applications in educational text design, adaptive learning, and corpus annotation.
%U https://aclanthology.org/2025.lowresnlp-1.18/
%P 192-200
Markdown (Informal)
[Towards Creating a Bulgarian Readability Index](https://aclanthology.org/2025.lowresnlp-1.18/) (Kazakov et al., LowResNLP 2025)
ACL
- Dimitar Kazakov, Stefan Minkov, Ruslana Margova, Irina Temnikova, and Ivo Emauilov. 2025. Towards Creating a Bulgarian Readability Index. In Proceedings of the First Workshop on Advancing NLP for Low-Resource Languages, pages 192–200, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.