@inproceedings{broda-etal-2014-measuring,
title = "Measuring Readability of {P}olish Texts: Baseline Experiments",
author = "Broda, Bartosz and
Nito{\'n}, Bart{\l}omiej and
Gruszczy{\'n}ski, W{\l}odzimierz and
Ogrodniczuk, Maciej",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/427_Paper.pdf",
pages = "573--580",
abstract = "Measuring readability of a text is the first sensible step to its simplification. In this paper we present an overview of the most common approaches to automatic measuring of readability. Of the described ones, we implemented and evaluated: Gunning FOG index, Flesch-based Pisarek method. We also present two other approaches. The first one is based on measuring distributional lexical similarity of a target text and comparing it to reference texts. In the second one, we propose a novel method for automation of Taylor test ― which, in its base form, requires performing a large amount of surveys. The automation of Taylor test is performed using a technique called statistical language modelling. We have developed a free on-line web-based system and constructed plugins for the most common text editors, namely Microsoft Word and OpenOffice.org. Inner workings of the system are described in detail. Finally, extensive evaluations are performed for Polish ― a Slavic, highly inflected language. We show that Pisareks method is highly correlated to Gunning FOG Index, even if different in form, and that both the similarity-based approach and automated Taylor test achieve high accuracy. Merits of using either of them are discussed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="broda-etal-2014-measuring">
<titleInfo>
<title>Measuring Readability of Polish Texts: Baseline Experiments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bartosz</namePart>
<namePart type="family">Broda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bartłomiej</namePart>
<namePart type="family">Nitoń</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Włodzimierz</namePart>
<namePart type="family">Gruszczyński</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Measuring readability of a text is the first sensible step to its simplification. In this paper we present an overview of the most common approaches to automatic measuring of readability. Of the described ones, we implemented and evaluated: Gunning FOG index, Flesch-based Pisarek method. We also present two other approaches. The first one is based on measuring distributional lexical similarity of a target text and comparing it to reference texts. In the second one, we propose a novel method for automation of Taylor test ― which, in its base form, requires performing a large amount of surveys. The automation of Taylor test is performed using a technique called statistical language modelling. We have developed a free on-line web-based system and constructed plugins for the most common text editors, namely Microsoft Word and OpenOffice.org. Inner workings of the system are described in detail. Finally, extensive evaluations are performed for Polish ― a Slavic, highly inflected language. We show that Pisareks method is highly correlated to Gunning FOG Index, even if different in form, and that both the similarity-based approach and automated Taylor test achieve high accuracy. Merits of using either of them are discussed.</abstract>
<identifier type="citekey">broda-etal-2014-measuring</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/427_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>573</start>
<end>580</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Readability of Polish Texts: Baseline Experiments
%A Broda, Bartosz
%A Nitoń, Bartłomiej
%A Gruszczyński, Włodzimierz
%A Ogrodniczuk, Maciej
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F broda-etal-2014-measuring
%X Measuring readability of a text is the first sensible step to its simplification. In this paper we present an overview of the most common approaches to automatic measuring of readability. Of the described ones, we implemented and evaluated: Gunning FOG index, Flesch-based Pisarek method. We also present two other approaches. The first one is based on measuring distributional lexical similarity of a target text and comparing it to reference texts. In the second one, we propose a novel method for automation of Taylor test ― which, in its base form, requires performing a large amount of surveys. The automation of Taylor test is performed using a technique called statistical language modelling. We have developed a free on-line web-based system and constructed plugins for the most common text editors, namely Microsoft Word and OpenOffice.org. Inner workings of the system are described in detail. Finally, extensive evaluations are performed for Polish ― a Slavic, highly inflected language. We show that Pisareks method is highly correlated to Gunning FOG Index, even if different in form, and that both the similarity-based approach and automated Taylor test achieve high accuracy. Merits of using either of them are discussed.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/427_Paper.pdf
%P 573-580
Markdown (Informal)
[Measuring Readability of Polish Texts: Baseline Experiments](http://www.lrec-conf.org/proceedings/lrec2014/pdf/427_Paper.pdf) (Broda et al., LREC 2014)
ACL
- Bartosz Broda, Bartłomiej Nitoń, Włodzimierz Gruszczyński, and Maciej Ogrodniczuk. 2014. Measuring Readability of Polish Texts: Baseline Experiments. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 573–580, Reykjavik, Iceland. European Language Resources Association (ELRA).