@inproceedings{callegari-xhura-2022-corpus,
title = "A corpus for Automatic Article Analysis",
author = "Callegari, Elena and
Xhura, Desara",
booktitle = "Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)",
month = sep,
year = "2022",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, IBL -- BAS",
url = "https://aclanthology.org/2022.clib-1.2",
pages = "13--21",
abstract = "We describe the structure and creation of the SageWrite corpus. This is a manually annotated corpus created to support automatic language generation and automatic quality assessment of academic articles. The corpus currently contains annotations for 100 excerpts taken from various scientific articles. For each of these excerpts, the corpus contains (i) a draft version of the excerpt (ii) annotations that reflect the stylistic and linguistics merits of the excerpt, such as whether or not the text is clearly structured. The SageWrite corpus is the first corpus for the fine-tuning of text-generation algorithms that specifically addresses academic writing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="callegari-xhura-2022-corpus">
<titleInfo>
<title>A corpus for Automatic Article Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Callegari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Desara</namePart>
<namePart type="family">Xhura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, IBL – BAS</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We describe the structure and creation of the SageWrite corpus. This is a manually annotated corpus created to support automatic language generation and automatic quality assessment of academic articles. The corpus currently contains annotations for 100 excerpts taken from various scientific articles. For each of these excerpts, the corpus contains (i) a draft version of the excerpt (ii) annotations that reflect the stylistic and linguistics merits of the excerpt, such as whether or not the text is clearly structured. The SageWrite corpus is the first corpus for the fine-tuning of text-generation algorithms that specifically addresses academic writing.</abstract>
<identifier type="citekey">callegari-xhura-2022-corpus</identifier>
<location>
<url>https://aclanthology.org/2022.clib-1.2</url>
</location>
<part>
<date>2022-09</date>
<extent unit="page">
<start>13</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A corpus for Automatic Article Analysis
%A Callegari, Elena
%A Xhura, Desara
%S Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022)
%D 2022
%8 September
%I Department of Computational Linguistics, IBL – BAS
%C Sofia, Bulgaria
%F callegari-xhura-2022-corpus
%X We describe the structure and creation of the SageWrite corpus. This is a manually annotated corpus created to support automatic language generation and automatic quality assessment of academic articles. The corpus currently contains annotations for 100 excerpts taken from various scientific articles. For each of these excerpts, the corpus contains (i) a draft version of the excerpt (ii) annotations that reflect the stylistic and linguistics merits of the excerpt, such as whether or not the text is clearly structured. The SageWrite corpus is the first corpus for the fine-tuning of text-generation algorithms that specifically addresses academic writing.
%U https://aclanthology.org/2022.clib-1.2
%P 13-21
Markdown (Informal)
[A corpus for Automatic Article Analysis](https://aclanthology.org/2022.clib-1.2) (Callegari & Xhura, CLIB 2022)
ACL
- Elena Callegari and Desara Xhura. 2022. A corpus for Automatic Article Analysis. In Proceedings of the 5th International Conference on Computational Linguistics in Bulgaria (CLIB 2022), pages 13–21, Sofia, Bulgaria. Department of Computational Linguistics, IBL -- BAS.