@article{louis-nenkova-2013-corpus,
title = "A corpus of science journalism for analyzing writing quality",
author = "Louis, Annie and
Nenkova, Ani",
editor = "Fern{\'a}ndez, Raquel and
Dipper, Stefanie and
Zinsmeister, Heike and
Webber, Bonnie",
journal = "Dialogue {\&} Discourse",
volume = "4",
month = may,
year = "2013",
address = "Bielefeld, Germany",
publisher = "University of Bielefeld",
url = "https://aclanthology.org/2013.dnd-4.8/",
doi = "10.5087/dad.2013.205",
pages = "87--117",
abstract = {We introduce a corpus of science journalism articles, categorized in three levels of writing quality. The corpus ful{\"i}{\textlnot}lls a glaring need for realistic data on which applications concerned with predicting text quality can be developed and evaluated. In this article we describe how we identi{\"i}{\textlnot}ed, guided by the judgements of renowned writers, samples of extraordinarily well-written pieces and how these were expanded to a larger set of typical journalistic writing. We provide details about the corpus and the text quality evaluations it can support. Our intention is to further extend the corpus with annotations of phenomena that reveal quanti{\"i}{\textlnot}able differences between levels of writing quality. Here we introduce two of the many types of annotation on the sentence level that distinguish amazing from typical writing: text generality/speci{\"i}{\textlnot}city and communicative goal. We explore the feasibility of acquiring annotations automatically, and verify that such features are indeed predictive of writing quality. We {\"i}{\textlnot}nd that the annotation of general/speci{\"i}{\textlnot}c on sentence level can be performed reasonably accurately fully automatically, while automatic annotations of communicative goal reveals salient characteristics of journalistic writing but does not align with categories we wish to annotate in future work.}
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="louis-nenkova-2013-corpus">
<titleInfo>
<title>A corpus of science journalism for analyzing writing quality</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Louis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ani</namePart>
<namePart type="family">Nenkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2013-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Dialogue & Discourse</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>University of Bielefeld</publisher>
<place>
<placeTerm type="text">Bielefeld, Germany</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We introduce a corpus of science journalism articles, categorized in three levels of writing quality. The corpus fulï¬lls a glaring need for realistic data on which applications concerned with predicting text quality can be developed and evaluated. In this article we describe how we identiï¬ed, guided by the judgements of renowned writers, samples of extraordinarily well-written pieces and how these were expanded to a larger set of typical journalistic writing. We provide details about the corpus and the text quality evaluations it can support. Our intention is to further extend the corpus with annotations of phenomena that reveal quantiï¬able differences between levels of writing quality. Here we introduce two of the many types of annotation on the sentence level that distinguish amazing from typical writing: text generality/speciï¬city and communicative goal. We explore the feasibility of acquiring annotations automatically, and verify that such features are indeed predictive of writing quality. We ï¬nd that the annotation of general/speciï¬c on sentence level can be performed reasonably accurately fully automatically, while automatic annotations of communicative goal reveals salient characteristics of journalistic writing but does not align with categories we wish to annotate in future work.</abstract>
<identifier type="citekey">louis-nenkova-2013-corpus</identifier>
<identifier type="doi">10.5087/dad.2013.205</identifier>
<location>
<url>https://aclanthology.org/2013.dnd-4.8/</url>
</location>
<part>
<date>2013-05</date>
<detail type="volume"><number>4</number></detail>
<extent unit="page">
<start>87</start>
<end>117</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T A corpus of science journalism for analyzing writing quality
%A Louis, Annie
%A Nenkova, Ani
%J Dialogue & Discourse
%D 2013
%8 May
%V 4
%I University of Bielefeld
%C Bielefeld, Germany
%F louis-nenkova-2013-corpus
%X We introduce a corpus of science journalism articles, categorized in three levels of writing quality. The corpus fulï¬lls a glaring need for realistic data on which applications concerned with predicting text quality can be developed and evaluated. In this article we describe how we identiï¬ed, guided by the judgements of renowned writers, samples of extraordinarily well-written pieces and how these were expanded to a larger set of typical journalistic writing. We provide details about the corpus and the text quality evaluations it can support. Our intention is to further extend the corpus with annotations of phenomena that reveal quantiï¬able differences between levels of writing quality. Here we introduce two of the many types of annotation on the sentence level that distinguish amazing from typical writing: text generality/speciï¬city and communicative goal. We explore the feasibility of acquiring annotations automatically, and verify that such features are indeed predictive of writing quality. We ï¬nd that the annotation of general/speciï¬c on sentence level can be performed reasonably accurately fully automatically, while automatic annotations of communicative goal reveals salient characteristics of journalistic writing but does not align with categories we wish to annotate in future work.
%R 10.5087/dad.2013.205
%U https://aclanthology.org/2013.dnd-4.8/
%U https://doi.org/10.5087/dad.2013.205
%P 87-117
Markdown (Informal)
[A corpus of science journalism for analyzing writing quality](https://aclanthology.org/2013.dnd-4.8/) (Louis & Nenkova, DND 2013)
ACL