@inproceedings{trott-riviere-2024-measuring,
title = "Measuring and Modifying the Readability of {E}nglish Texts with {GPT}-4",
author = "Trott, Sean and
Rivi{\`e}re, Pamela",
editor = "Shardlow, Matthew and
Saggion, Horacio and
Alva-Manchego, Fernando and
Zampieri, Marcos and
North, Kai and
{\v{S}}tajner, Sanja and
Stodden, Regina",
booktitle = "Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.tsar-1.13",
pages = "126--134",
abstract = "The success of Large Language Models (LLMs) in other domains has raised the question of whether LLMs can reliably assess and manipulate the readability of text. We approach this question empirically. First, using a published corpus of 4,724 English text excerpts, we find that readability estimates produced {``}zero-shot{''} from GPT-4 Turbo and GPT-4o mini exhibit relatively high correlation with human judgments (r = 0.76 and r = 0.74, respectively), out-performing estimates derived from traditional readability formulas and various psycholinguistic indices. Then, in a pre-registered human experiment (N = 59), we ask whether Turbo can reliably make text easier or harder to read. We find evidence to support this hypothesis, though considerable variance in human judgments remains unexplained. We conclude by discussing the limitations of this approach, including limited scope, as well as the validity of the {``}readability{''} construct and its dependence on context, audience, and goal.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="trott-riviere-2024-measuring">
<titleInfo>
<title>Measuring and Modifying the Readability of English Texts with GPT-4</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Trott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pamela</namePart>
<namePart type="family">Rivière</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Shardlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Stodden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The success of Large Language Models (LLMs) in other domains has raised the question of whether LLMs can reliably assess and manipulate the readability of text. We approach this question empirically. First, using a published corpus of 4,724 English text excerpts, we find that readability estimates produced “zero-shot” from GPT-4 Turbo and GPT-4o mini exhibit relatively high correlation with human judgments (r = 0.76 and r = 0.74, respectively), out-performing estimates derived from traditional readability formulas and various psycholinguistic indices. Then, in a pre-registered human experiment (N = 59), we ask whether Turbo can reliably make text easier or harder to read. We find evidence to support this hypothesis, though considerable variance in human judgments remains unexplained. We conclude by discussing the limitations of this approach, including limited scope, as well as the validity of the “readability” construct and its dependence on context, audience, and goal.</abstract>
<identifier type="citekey">trott-riviere-2024-measuring</identifier>
<location>
<url>https://aclanthology.org/2024.tsar-1.13</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>126</start>
<end>134</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring and Modifying the Readability of English Texts with GPT-4
%A Trott, Sean
%A Rivière, Pamela
%Y Shardlow, Matthew
%Y Saggion, Horacio
%Y Alva-Manchego, Fernando
%Y Zampieri, Marcos
%Y North, Kai
%Y Štajner, Sanja
%Y Stodden, Regina
%S Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F trott-riviere-2024-measuring
%X The success of Large Language Models (LLMs) in other domains has raised the question of whether LLMs can reliably assess and manipulate the readability of text. We approach this question empirically. First, using a published corpus of 4,724 English text excerpts, we find that readability estimates produced “zero-shot” from GPT-4 Turbo and GPT-4o mini exhibit relatively high correlation with human judgments (r = 0.76 and r = 0.74, respectively), out-performing estimates derived from traditional readability formulas and various psycholinguistic indices. Then, in a pre-registered human experiment (N = 59), we ask whether Turbo can reliably make text easier or harder to read. We find evidence to support this hypothesis, though considerable variance in human judgments remains unexplained. We conclude by discussing the limitations of this approach, including limited scope, as well as the validity of the “readability” construct and its dependence on context, audience, and goal.
%U https://aclanthology.org/2024.tsar-1.13
%P 126-134
Markdown (Informal)
[Measuring and Modifying the Readability of English Texts with GPT-4](https://aclanthology.org/2024.tsar-1.13) (Trott & Rivière, TSAR 2024)
ACL