@inproceedings{troiano-vossen-2024-clause,
title = "{CLAUSE}-{ATLAS}: A Corpus of Narrative Information to Scale up Computational Literary Analysis",
author = "Troiano, Enrica and
Vossen, Piek T.J.M.",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.292/",
pages = "3283--3296",
abstract = "We introduce CLAUSE-ATLAS, a resource of XIX and XX century English novels annotated automatically. This corpus, which contains 41,715 labeled clauses, allows to study stories as sequences of eventive, subjective and contextual information. We use it to investigate if recent large language models, in particular gpt-3.5-turbo with 16k tokens of context, constitute promising tools to annotate large amounts of data for literary studies (we show that this is the case). Moreover, by analyzing the annotations so collected, we find that our clause-based approach to literature captures structural patterns within books, as well as qualitative differences between them."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="troiano-vossen-2024-clause">
<titleInfo>
<title>CLAUSE-ATLAS: A Corpus of Narrative Information to Scale up Computational Literary Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Enrica</namePart>
<namePart type="family">Troiano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piek</namePart>
<namePart type="given">T.J.M.</namePart>
<namePart type="family">Vossen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce CLAUSE-ATLAS, a resource of XIX and XX century English novels annotated automatically. This corpus, which contains 41,715 labeled clauses, allows to study stories as sequences of eventive, subjective and contextual information. We use it to investigate if recent large language models, in particular gpt-3.5-turbo with 16k tokens of context, constitute promising tools to annotate large amounts of data for literary studies (we show that this is the case). Moreover, by analyzing the annotations so collected, we find that our clause-based approach to literature captures structural patterns within books, as well as qualitative differences between them.</abstract>
<identifier type="citekey">troiano-vossen-2024-clause</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.292/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>3283</start>
<end>3296</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CLAUSE-ATLAS: A Corpus of Narrative Information to Scale up Computational Literary Analysis
%A Troiano, Enrica
%A Vossen, Piek T.J.M.
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F troiano-vossen-2024-clause
%X We introduce CLAUSE-ATLAS, a resource of XIX and XX century English novels annotated automatically. This corpus, which contains 41,715 labeled clauses, allows to study stories as sequences of eventive, subjective and contextual information. We use it to investigate if recent large language models, in particular gpt-3.5-turbo with 16k tokens of context, constitute promising tools to annotate large amounts of data for literary studies (we show that this is the case). Moreover, by analyzing the annotations so collected, we find that our clause-based approach to literature captures structural patterns within books, as well as qualitative differences between them.
%U https://aclanthology.org/2024.lrec-main.292/
%P 3283-3296
Markdown (Informal)
[CLAUSE-ATLAS: A Corpus of Narrative Information to Scale up Computational Literary Analysis](https://aclanthology.org/2024.lrec-main.292/) (Troiano & Vossen, LREC-COLING 2024)
ACL