@inproceedings{chitez-etal-2025-integrating,
title = "Integrating Archaic and Regional Lexicons to Improve the Readability of {O}ld {R}omanian Texts",
author = {Chitez, Madalina and
Rogobete, Roxana and
Udrea, Cristina Aura and
Cs{\"u}r{\"o}s, Karla and
Bucur, Ana-Maria and
Dascalu, Mihai},
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.29/",
pages = "240--246",
abstract = "Access to age-appropriate texts is critical for young readers' literacy acquisition. For limited-resourced languages, such as Romanian, this area remains under-researched. As such, we present ongoing work on improving readability for old Romanian texts by applying Large Language Models (LLMs). First, we compiled and cleaned a comprehensive list of archaic and regional terms from lexicographic sources, including DEX online and printed dictionaries. The cleaning process involved duplicate removal, orthographic normalization, context-based filtering, and manual review. Key challenges included distinguishing archaic forms from rare or poetic ones, resolving polysemous entries, and managing inconsistent labeling across sources. Second, LLMs were utilized to validate the archaic and regional nature of identified terms and replace them with modern equivalents, while also determining the appropriate reading level for both original and modified versions. Results show that through the replacement of archaic and regional terms, the appropriate age for the modified texts decreases by approximately 0.5 years for texts extracted from textbooks and canonical writings."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chitez-etal-2025-integrating">
<titleInfo>
<title>Integrating Archaic and Regional Lexicons to Improve the Readability of Old Romanian Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Madalina</namePart>
<namePart type="family">Chitez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roxana</namePart>
<namePart type="family">Rogobete</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="given">Aura</namePart>
<namePart type="family">Udrea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karla</namePart>
<namePart type="family">Csürös</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana-Maria</namePart>
<namePart type="family">Bucur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mihai</namePart>
<namePart type="family">Dascalu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Access to age-appropriate texts is critical for young readers’ literacy acquisition. For limited-resourced languages, such as Romanian, this area remains under-researched. As such, we present ongoing work on improving readability for old Romanian texts by applying Large Language Models (LLMs). First, we compiled and cleaned a comprehensive list of archaic and regional terms from lexicographic sources, including DEX online and printed dictionaries. The cleaning process involved duplicate removal, orthographic normalization, context-based filtering, and manual review. Key challenges included distinguishing archaic forms from rare or poetic ones, resolving polysemous entries, and managing inconsistent labeling across sources. Second, LLMs were utilized to validate the archaic and regional nature of identified terms and replace them with modern equivalents, while also determining the appropriate reading level for both original and modified versions. Results show that through the replacement of archaic and regional terms, the appropriate age for the modified texts decreases by approximately 0.5 years for texts extracted from textbooks and canonical writings.</abstract>
<identifier type="citekey">chitez-etal-2025-integrating</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.29/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>240</start>
<end>246</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Integrating Archaic and Regional Lexicons to Improve the Readability of Old Romanian Texts
%A Chitez, Madalina
%A Rogobete, Roxana
%A Udrea, Cristina Aura
%A Csürös, Karla
%A Bucur, Ana-Maria
%A Dascalu, Mihai
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F chitez-etal-2025-integrating
%X Access to age-appropriate texts is critical for young readers’ literacy acquisition. For limited-resourced languages, such as Romanian, this area remains under-researched. As such, we present ongoing work on improving readability for old Romanian texts by applying Large Language Models (LLMs). First, we compiled and cleaned a comprehensive list of archaic and regional terms from lexicographic sources, including DEX online and printed dictionaries. The cleaning process involved duplicate removal, orthographic normalization, context-based filtering, and manual review. Key challenges included distinguishing archaic forms from rare or poetic ones, resolving polysemous entries, and managing inconsistent labeling across sources. Second, LLMs were utilized to validate the archaic and regional nature of identified terms and replace them with modern equivalents, while also determining the appropriate reading level for both original and modified versions. Results show that through the replacement of archaic and regional terms, the appropriate age for the modified texts decreases by approximately 0.5 years for texts extracted from textbooks and canonical writings.
%U https://aclanthology.org/2025.ranlp-1.29/
%P 240-246
Markdown (Informal)
[Integrating Archaic and Regional Lexicons to Improve the Readability of Old Romanian Texts](https://aclanthology.org/2025.ranlp-1.29/) (Chitez et al., RANLP 2025)
ACL
- Madalina Chitez, Roxana Rogobete, Cristina Aura Udrea, Karla Csürös, Ana-Maria Bucur, and Mihai Dascalu. 2025. Integrating Archaic and Regional Lexicons to Improve the Readability of Old Romanian Texts. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 240–246, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.