@inproceedings{barbu-etal-2025-improving,
title = "Improving {E}stonian Text Simplification through Pretrained Language Models and Custom Datasets",
author = "Barbu, Eduard and
Muru, Meeri-Ly and
Malva, Sten Marcus",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.16/",
pages = "133--142",
abstract = "This paper presents a method for text simplification based on two neural architectures: a neural machine translation (NMT) model and a fine-tuned large language model (LLaMA). Given the scarcity of existing resources for Estonian, a new dataset was created by combining manually translated corpora with GPT-4.0-generated simplifications. OpenNMT was selected as a representative NMT-based system, while LLaMA was fine-tuned on the constructed dataset. Evaluation shows LLaMA outperforms OpenNMT in grammaticality, readability, and meaning preservation. These results underscore the effectiveness of large language models for text simplification in low-resource language settings. The complete dataset, fine-tuning scripts, and evaluation pipeline are provided in a publicly accessible supplementary package to support reproducibility and adaptation to other languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barbu-etal-2025-improving">
<titleInfo>
<title>Improving Estonian Text Simplification through Pretrained Language Models and Custom Datasets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Barbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meeri-Ly</namePart>
<namePart type="family">Muru</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sten</namePart>
<namePart type="given">Marcus</namePart>
<namePart type="family">Malva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a method for text simplification based on two neural architectures: a neural machine translation (NMT) model and a fine-tuned large language model (LLaMA). Given the scarcity of existing resources for Estonian, a new dataset was created by combining manually translated corpora with GPT-4.0-generated simplifications. OpenNMT was selected as a representative NMT-based system, while LLaMA was fine-tuned on the constructed dataset. Evaluation shows LLaMA outperforms OpenNMT in grammaticality, readability, and meaning preservation. These results underscore the effectiveness of large language models for text simplification in low-resource language settings. The complete dataset, fine-tuning scripts, and evaluation pipeline are provided in a publicly accessible supplementary package to support reproducibility and adaptation to other languages.</abstract>
<identifier type="citekey">barbu-etal-2025-improving</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.16/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>133</start>
<end>142</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Estonian Text Simplification through Pretrained Language Models and Custom Datasets
%A Barbu, Eduard
%A Muru, Meeri-Ly
%A Malva, Sten Marcus
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F barbu-etal-2025-improving
%X This paper presents a method for text simplification based on two neural architectures: a neural machine translation (NMT) model and a fine-tuned large language model (LLaMA). Given the scarcity of existing resources for Estonian, a new dataset was created by combining manually translated corpora with GPT-4.0-generated simplifications. OpenNMT was selected as a representative NMT-based system, while LLaMA was fine-tuned on the constructed dataset. Evaluation shows LLaMA outperforms OpenNMT in grammaticality, readability, and meaning preservation. These results underscore the effectiveness of large language models for text simplification in low-resource language settings. The complete dataset, fine-tuning scripts, and evaluation pipeline are provided in a publicly accessible supplementary package to support reproducibility and adaptation to other languages.
%U https://aclanthology.org/2025.ranlp-1.16/
%P 133-142
Markdown (Informal)
[Improving Estonian Text Simplification through Pretrained Language Models and Custom Datasets](https://aclanthology.org/2025.ranlp-1.16/) (Barbu et al., RANLP 2025)
ACL