@inproceedings{leal-etal-2018-nontrivial,
title = "A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in {P}ortuguese",
author = "Leal, Sidney Evaldo and
Duran, Magali Sanches and
Alu{\'i}sio, Sandra Maria",
editor = "Bender, Emily M. and
Derczynski, Leon and
Isabelle, Pierre",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/C18-1034/",
pages = "401--413",
abstract = "Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. In this paper, we generate a nontrivial sentence corpus in Portuguese. We evaluate three scenarios for building it, taking advantage of a parallel corpus of simplification, in which each sentence triplet is aligned and has simplification operations annotated, being ideal for justifying possible mistakes of future methods. The best scenario of our corpus PorSimplesSent is composed of 4,888 pairs, which is bigger than a similar corpus for English; all the three versions of it are publicly available. We created four baselines for PorSimplesSent and made available a pairwise ranking method, using 17 linguistic and psycholinguistic features, which correctly identifies the ranking of sentence pairs with an accuracy of 74.2{\%}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="leal-etal-2018-nontrivial">
<titleInfo>
<title>A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sidney</namePart>
<namePart type="given">Evaldo</namePart>
<namePart type="family">Leal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magali</namePart>
<namePart type="given">Sanches</namePart>
<namePart type="family">Duran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Aluísio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 27th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierre</namePart>
<namePart type="family">Isabelle</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. In this paper, we generate a nontrivial sentence corpus in Portuguese. We evaluate three scenarios for building it, taking advantage of a parallel corpus of simplification, in which each sentence triplet is aligned and has simplification operations annotated, being ideal for justifying possible mistakes of future methods. The best scenario of our corpus PorSimplesSent is composed of 4,888 pairs, which is bigger than a similar corpus for English; all the three versions of it are publicly available. We created four baselines for PorSimplesSent and made available a pairwise ranking method, using 17 linguistic and psycholinguistic features, which correctly identifies the ranking of sentence pairs with an accuracy of 74.2%.</abstract>
<identifier type="citekey">leal-etal-2018-nontrivial</identifier>
<location>
<url>https://aclanthology.org/C18-1034/</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>401</start>
<end>413</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in Portuguese
%A Leal, Sidney Evaldo
%A Duran, Magali Sanches
%A Aluísio, Sandra Maria
%Y Bender, Emily M.
%Y Derczynski, Leon
%Y Isabelle, Pierre
%S Proceedings of the 27th International Conference on Computational Linguistics
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F leal-etal-2018-nontrivial
%X Effective textual communication depends on readers being proficient enough to comprehend texts, and texts being clear enough to be understood by the intended audience, in a reading task. When the meaning of textual information and instructions is not well conveyed, many losses and damages may occur. Among the solutions to alleviate this problem is the automatic evaluation of sentence readability, task which has been receiving a lot of attention due to its large applicability. However, a shortage of resources, such as corpora for training and evaluation, hinders the full development of this task. In this paper, we generate a nontrivial sentence corpus in Portuguese. We evaluate three scenarios for building it, taking advantage of a parallel corpus of simplification, in which each sentence triplet is aligned and has simplification operations annotated, being ideal for justifying possible mistakes of future methods. The best scenario of our corpus PorSimplesSent is composed of 4,888 pairs, which is bigger than a similar corpus for English; all the three versions of it are publicly available. We created four baselines for PorSimplesSent and made available a pairwise ranking method, using 17 linguistic and psycholinguistic features, which correctly identifies the ranking of sentence pairs with an accuracy of 74.2%.
%U https://aclanthology.org/C18-1034/
%P 401-413
Markdown (Informal)
[A Nontrivial Sentence Corpus for the Task of Sentence Readability Assessment in Portuguese](https://aclanthology.org/C18-1034/) (Leal et al., COLING 2018)
ACL