@inproceedings{hatzel-biemann-2024-tell-large,
title = "Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story",
author = "Hatzel, Hans Ole and
Biemann, Chris",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1366",
pages = "15732--15741",
abstract = "A wide body of research is concerned with the semantics of narratives, both in terms of understanding narratives and generating fictional narratives and stories. We provide a dataset of summaries to be used as a proxy for entire stories or for the analysis of the summaries themselves. Our dataset consists of a total of 96,831 individual summaries across 29,505 stories. We intend for the dataset to be used for training and evaluation of embedding representations for stories, specifically the stories{'} narratives. The summary data is harvested from five different language versions of Wikipedia. Our dataset comes with rich metadata, which we extract from Wikidata, enabling a wide range of applications that operate on story summaries in conjunction with metadata. To set baseline results, we run retrieval experiments on the dataset, exploring the capability of similarity models in retrieving summaries of the same story. For this retrieval, a crucial element is to not place too much emphasis on the named entities, as this can enable retrieval of other summaries for the same work without taking the narrative into account.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hatzel-biemann-2024-tell-large">
<titleInfo>
<title>Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hans</namePart>
<namePart type="given">Ole</namePart>
<namePart type="family">Hatzel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A wide body of research is concerned with the semantics of narratives, both in terms of understanding narratives and generating fictional narratives and stories. We provide a dataset of summaries to be used as a proxy for entire stories or for the analysis of the summaries themselves. Our dataset consists of a total of 96,831 individual summaries across 29,505 stories. We intend for the dataset to be used for training and evaluation of embedding representations for stories, specifically the stories’ narratives. The summary data is harvested from five different language versions of Wikipedia. Our dataset comes with rich metadata, which we extract from Wikidata, enabling a wide range of applications that operate on story summaries in conjunction with metadata. To set baseline results, we run retrieval experiments on the dataset, exploring the capability of similarity models in retrieving summaries of the same story. For this retrieval, a crucial element is to not place too much emphasis on the named entities, as this can enable retrieval of other summaries for the same work without taking the narrative into account.</abstract>
<identifier type="citekey">hatzel-biemann-2024-tell-large</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1366</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>15732</start>
<end>15741</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story
%A Hatzel, Hans Ole
%A Biemann, Chris
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F hatzel-biemann-2024-tell-large
%X A wide body of research is concerned with the semantics of narratives, both in terms of understanding narratives and generating fictional narratives and stories. We provide a dataset of summaries to be used as a proxy for entire stories or for the analysis of the summaries themselves. Our dataset consists of a total of 96,831 individual summaries across 29,505 stories. We intend for the dataset to be used for training and evaluation of embedding representations for stories, specifically the stories’ narratives. The summary data is harvested from five different language versions of Wikipedia. Our dataset comes with rich metadata, which we extract from Wikidata, enabling a wide range of applications that operate on story summaries in conjunction with metadata. To set baseline results, we run retrieval experiments on the dataset, exploring the capability of similarity models in retrieving summaries of the same story. For this retrieval, a crucial element is to not place too much emphasis on the named entities, as this can enable retrieval of other summaries for the same work without taking the narrative into account.
%U https://aclanthology.org/2024.lrec-main.1366
%P 15732-15741
Markdown (Informal)
[Tell Me Again! a Large-Scale Dataset of Multiple Summaries for the Same Story](https://aclanthology.org/2024.lrec-main.1366) (Hatzel & Biemann, LREC-COLING 2024)
ACL