@inproceedings{rennes-2020-simpler,
title = "Is it simpler? An Evaluation of an Aligned Corpus of Standard-Simple Sentences",
author = "Rennes, Evelina",
editor = "Gala, N{\'u}ria and
Wilkens, Rodrigo",
booktitle = "Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.readi-1.2",
pages = "6--13",
abstract = "Parallel monolingual resources are imperative for data-driven sentence simplification research. We present the work of aligning, at the sentence level, a corpus of all Swedish public authorities and municipalities web texts in standard and simple Swedish. We compare the performance of three alignment algorithms used for similar work in English (Average Alignment, Maximum Alignment, and Hungarian Alignment), and the best-performing algorithm is used to create a resource of 15,433 unique sentence pairs. We evaluate the resulting corpus using a set of features that has proven to predict text complexity of Swedish texts. The results show that the sentences of the simple sub-corpus are indeed less complex than the sentences of the standard part of the corpus, according to many of the text complexity measures.",
language = "English",
ISBN = "979-10-95546-45-0",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rennes-2020-simpler">
<titleInfo>
<title>Is it simpler? An Evaluation of an Aligned Corpus of Standard-Simple Sentences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Evelina</namePart>
<namePart type="family">Rennes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Núria</namePart>
<namePart type="family">Gala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rodrigo</namePart>
<namePart type="family">Wilkens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-45-0</identifier>
</relatedItem>
<abstract>Parallel monolingual resources are imperative for data-driven sentence simplification research. We present the work of aligning, at the sentence level, a corpus of all Swedish public authorities and municipalities web texts in standard and simple Swedish. We compare the performance of three alignment algorithms used for similar work in English (Average Alignment, Maximum Alignment, and Hungarian Alignment), and the best-performing algorithm is used to create a resource of 15,433 unique sentence pairs. We evaluate the resulting corpus using a set of features that has proven to predict text complexity of Swedish texts. The results show that the sentences of the simple sub-corpus are indeed less complex than the sentences of the standard part of the corpus, according to many of the text complexity measures.</abstract>
<identifier type="citekey">rennes-2020-simpler</identifier>
<location>
<url>https://aclanthology.org/2020.readi-1.2</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>6</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is it simpler? An Evaluation of an Aligned Corpus of Standard-Simple Sentences
%A Rennes, Evelina
%Y Gala, Núria
%Y Wilkens, Rodrigo
%S Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-45-0
%G English
%F rennes-2020-simpler
%X Parallel monolingual resources are imperative for data-driven sentence simplification research. We present the work of aligning, at the sentence level, a corpus of all Swedish public authorities and municipalities web texts in standard and simple Swedish. We compare the performance of three alignment algorithms used for similar work in English (Average Alignment, Maximum Alignment, and Hungarian Alignment), and the best-performing algorithm is used to create a resource of 15,433 unique sentence pairs. We evaluate the resulting corpus using a set of features that has proven to predict text complexity of Swedish texts. The results show that the sentences of the simple sub-corpus are indeed less complex than the sentences of the standard part of the corpus, according to many of the text complexity measures.
%U https://aclanthology.org/2020.readi-1.2
%P 6-13
Markdown (Informal)
[Is it simpler? An Evaluation of an Aligned Corpus of Standard-Simple Sentences](https://aclanthology.org/2020.readi-1.2) (Rennes, READI 2020)
ACL