@inproceedings{barbu-etal-2025-easyjon,
title = "{E}asy{J}on at {TSAR} 2025 Shared Task Evaluation of Automated Text Simplification with {LLM}-as-a-Judge",
author = "Barbu, Paul-Gerhard and
Lipska-Dieck, Adrianna and
Lindner, Lena",
editor = "Shardlow, Matthew and
Alva-Manchego, Fernando and
North, Kai and
Stodden, Regina and
Saggion, Horacio and
Khallaf, Nouran and
Hayakawa, Akio",
booktitle = "Proceedings of the Fourth Workshop on Text Simplification, Accessibility and Readability (TSAR 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.tsar-1.14/",
pages = "173--182",
ISBN = "979-8-89176-176-6",
abstract = "This paper presents an approach to automated text simplification for CEFR A2 and B1 levels using large language models and prompt engineering. We evaluate seven models across three prompting strategies short, descriptive, and descriptive with examples. A two-round evaluation system using LLM-as-a-Judge and traditional metrics for text simplification determines optimal model-prompt combinations for final submissions. Results demonstrate that descriptive prompts consistently outperform other strategies across all models, achieving 46-65{\%} of first-place rankings. Qwen3 shows superior performance for A2-level simplification, while B1-level results are more balanced across models. The LLM-as-a-Judge evaluation method shows strong alignment with traditional metrics while providing enhanced explainability."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barbu-etal-2025-easyjon">
<titleInfo>
<title>EasyJon at TSAR 2025 Shared Task Evaluation of Automated Text Simplification with LLM-as-a-Judge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paul-Gerhard</namePart>
<namePart type="family">Barbu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrianna</namePart>
<namePart type="family">Lipska-Dieck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lena</namePart>
<namePart type="family">Lindner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Text Simplification, Accessibility and Readability (TSAR 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Shardlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Stodden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouran</namePart>
<namePart type="family">Khallaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akio</namePart>
<namePart type="family">Hayakawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-176-6</identifier>
</relatedItem>
<abstract>This paper presents an approach to automated text simplification for CEFR A2 and B1 levels using large language models and prompt engineering. We evaluate seven models across three prompting strategies short, descriptive, and descriptive with examples. A two-round evaluation system using LLM-as-a-Judge and traditional metrics for text simplification determines optimal model-prompt combinations for final submissions. Results demonstrate that descriptive prompts consistently outperform other strategies across all models, achieving 46-65% of first-place rankings. Qwen3 shows superior performance for A2-level simplification, while B1-level results are more balanced across models. The LLM-as-a-Judge evaluation method shows strong alignment with traditional metrics while providing enhanced explainability.</abstract>
<identifier type="citekey">barbu-etal-2025-easyjon</identifier>
<location>
<url>https://aclanthology.org/2025.tsar-1.14/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>173</start>
<end>182</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T EasyJon at TSAR 2025 Shared Task Evaluation of Automated Text Simplification with LLM-as-a-Judge
%A Barbu, Paul-Gerhard
%A Lipska-Dieck, Adrianna
%A Lindner, Lena
%Y Shardlow, Matthew
%Y Alva-Manchego, Fernando
%Y North, Kai
%Y Stodden, Regina
%Y Saggion, Horacio
%Y Khallaf, Nouran
%Y Hayakawa, Akio
%S Proceedings of the Fourth Workshop on Text Simplification, Accessibility and Readability (TSAR 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-176-6
%F barbu-etal-2025-easyjon
%X This paper presents an approach to automated text simplification for CEFR A2 and B1 levels using large language models and prompt engineering. We evaluate seven models across three prompting strategies short, descriptive, and descriptive with examples. A two-round evaluation system using LLM-as-a-Judge and traditional metrics for text simplification determines optimal model-prompt combinations for final submissions. Results demonstrate that descriptive prompts consistently outperform other strategies across all models, achieving 46-65% of first-place rankings. Qwen3 shows superior performance for A2-level simplification, while B1-level results are more balanced across models. The LLM-as-a-Judge evaluation method shows strong alignment with traditional metrics while providing enhanced explainability.
%U https://aclanthology.org/2025.tsar-1.14/
%P 173-182
Markdown (Informal)
[EasyJon at TSAR 2025 Shared Task Evaluation of Automated Text Simplification with LLM-as-a-Judge](https://aclanthology.org/2025.tsar-1.14/) (Barbu et al., TSAR 2025)
ACL