@inproceedings{patil-etal-2025-quantifying,
title = "Quantifying word complexity for Leichte Sprache: A computational metric and its psycholinguistic validation",
author = "Patil, Umesh and
Calvillo, Jesus and
Lago, Sol and
Schumann, Anne-Kathrin",
editor = "Ginel, Mar{\'i}a Isabel Rivas and
Cadwell, Patrick and
Canavese, Paolo and
Hansen-Schirra, Silvia and
Kappus, Martin and
Matamala, Anna and
Noonan, Will",
booktitle = "Proceedings of the 1st Workshop on Artificial Intelligence and Easy and Plain Language in Institutional Contexts (AI {\&} EL/PL)",
month = jun,
year = "2025",
address = "Geneva, Switzerland",
publisher = "European Association for Machine Translation",
url = "https://aclanthology.org/2025.aielpl-1.9/",
pages = "94--107",
ISBN = "978-2-9701897-5-6",
abstract = "Leichte Sprache (Easy Language or Easy German) is a strongly simplified version of German geared toward a target group with limited language proficiency. In Germany, public bodies are required to provide information in Leichte Sprache. Unfortunately, Leichte Sprache rules are traditionally defined by non-linguists, they are not rooted in linguistic research and they do not provide precise decision criteria or devices for measuring the complexity of linguistic structures (Bock and Pappert,2023). For instance, one of the rules simply recommends the usage of simple rather than complex words. In this paper we, therefore, propose a model to determine word complexity. We train an XGBoost model for classifying word complexity by leveraging word-level linguistic and corpus-level distributional features, frequency information from an in-house Leichte Sprache corpus and human complexity annotations. We psycholinguistically validate our model by showing that it captures human word recognition times above and beyond traditional word-level predictors. Moreover, we discuss a number of practical applications of our classifier, such as the evaluation of AI-simplified text and detection of CEFR levels of words. To our knowledge, this is one of the first attempts to systematically quantify word complexity in the context of Leichte Sprache and to link it directly to real-time word processing."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patil-etal-2025-quantifying">
<titleInfo>
<title>Quantifying word complexity for Leichte Sprache: A computational metric and its psycholinguistic validation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Umesh</namePart>
<namePart type="family">Patil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesus</namePart>
<namePart type="family">Calvillo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sol</namePart>
<namePart type="family">Lago</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne-Kathrin</namePart>
<namePart type="family">Schumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Artificial Intelligence and Easy and Plain Language in Institutional Contexts (AI & EL/PL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">María</namePart>
<namePart type="given">Isabel</namePart>
<namePart type="given">Rivas</namePart>
<namePart type="family">Ginel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Cadwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paolo</namePart>
<namePart type="family">Canavese</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Silvia</namePart>
<namePart type="family">Hansen-Schirra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Kappus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Matamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Will</namePart>
<namePart type="family">Noonan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Association for Machine Translation</publisher>
<place>
<placeTerm type="text">Geneva, Switzerland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-2-9701897-5-6</identifier>
</relatedItem>
<abstract>Leichte Sprache (Easy Language or Easy German) is a strongly simplified version of German geared toward a target group with limited language proficiency. In Germany, public bodies are required to provide information in Leichte Sprache. Unfortunately, Leichte Sprache rules are traditionally defined by non-linguists, they are not rooted in linguistic research and they do not provide precise decision criteria or devices for measuring the complexity of linguistic structures (Bock and Pappert,2023). For instance, one of the rules simply recommends the usage of simple rather than complex words. In this paper we, therefore, propose a model to determine word complexity. We train an XGBoost model for classifying word complexity by leveraging word-level linguistic and corpus-level distributional features, frequency information from an in-house Leichte Sprache corpus and human complexity annotations. We psycholinguistically validate our model by showing that it captures human word recognition times above and beyond traditional word-level predictors. Moreover, we discuss a number of practical applications of our classifier, such as the evaluation of AI-simplified text and detection of CEFR levels of words. To our knowledge, this is one of the first attempts to systematically quantify word complexity in the context of Leichte Sprache and to link it directly to real-time word processing.</abstract>
<identifier type="citekey">patil-etal-2025-quantifying</identifier>
<location>
<url>https://aclanthology.org/2025.aielpl-1.9/</url>
</location>
<part>
<date>2025-06</date>
<extent unit="page">
<start>94</start>
<end>107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quantifying word complexity for Leichte Sprache: A computational metric and its psycholinguistic validation
%A Patil, Umesh
%A Calvillo, Jesus
%A Lago, Sol
%A Schumann, Anne-Kathrin
%Y Ginel, María Isabel Rivas
%Y Cadwell, Patrick
%Y Canavese, Paolo
%Y Hansen-Schirra, Silvia
%Y Kappus, Martin
%Y Matamala, Anna
%Y Noonan, Will
%S Proceedings of the 1st Workshop on Artificial Intelligence and Easy and Plain Language in Institutional Contexts (AI & EL/PL)
%D 2025
%8 June
%I European Association for Machine Translation
%C Geneva, Switzerland
%@ 978-2-9701897-5-6
%F patil-etal-2025-quantifying
%X Leichte Sprache (Easy Language or Easy German) is a strongly simplified version of German geared toward a target group with limited language proficiency. In Germany, public bodies are required to provide information in Leichte Sprache. Unfortunately, Leichte Sprache rules are traditionally defined by non-linguists, they are not rooted in linguistic research and they do not provide precise decision criteria or devices for measuring the complexity of linguistic structures (Bock and Pappert,2023). For instance, one of the rules simply recommends the usage of simple rather than complex words. In this paper we, therefore, propose a model to determine word complexity. We train an XGBoost model for classifying word complexity by leveraging word-level linguistic and corpus-level distributional features, frequency information from an in-house Leichte Sprache corpus and human complexity annotations. We psycholinguistically validate our model by showing that it captures human word recognition times above and beyond traditional word-level predictors. Moreover, we discuss a number of practical applications of our classifier, such as the evaluation of AI-simplified text and detection of CEFR levels of words. To our knowledge, this is one of the first attempts to systematically quantify word complexity in the context of Leichte Sprache and to link it directly to real-time word processing.
%U https://aclanthology.org/2025.aielpl-1.9/
%P 94-107
Markdown (Informal)
[Quantifying word complexity for Leichte Sprache: A computational metric and its psycholinguistic validation](https://aclanthology.org/2025.aielpl-1.9/) (Patil et al., AIELPL 2025)
ACL