@inproceedings{malykh-etal-2020-sumtitles,
title = "{S}um{T}itles: a Summarization Dataset with Low Extractiveness",
author = "Malykh, Valentin and
Chernis, Konstantin and
Artemova, Ekaterina and
Piontkovskaya, Irina",
editor = "Scott, Donia and
Bel, Nuria and
Zong, Chengqing",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2020.coling-main.503",
doi = "10.18653/v1/2020.coling-main.503",
pages = "5718--5730",
abstract = "The existing dialogue summarization corpora are significantly extractive. We introduce a methodology for dataset extractiveness evaluation and present a new low-extractive corpus of movie dialogues for abstractive text summarization along with baseline evaluation. The corpus contains 153k dialogues and consists of three parts: 1) automatically aligned subtitles, 2) automatically aligned scenes from scripts, and 3) manually aligned scenes from scripts. We also present an alignment algorithm which we use to construct the corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="malykh-etal-2020-sumtitles">
<titleInfo>
<title>SumTitles: a Summarization Dataset with Low Extractiveness</title>
</titleInfo>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Malykh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Chernis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Piontkovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Donia</namePart>
<namePart type="family">Scott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuria</namePart>
<namePart type="family">Bel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chengqing</namePart>
<namePart type="family">Zong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The existing dialogue summarization corpora are significantly extractive. We introduce a methodology for dataset extractiveness evaluation and present a new low-extractive corpus of movie dialogues for abstractive text summarization along with baseline evaluation. The corpus contains 153k dialogues and consists of three parts: 1) automatically aligned subtitles, 2) automatically aligned scenes from scripts, and 3) manually aligned scenes from scripts. We also present an alignment algorithm which we use to construct the corpus.</abstract>
<identifier type="citekey">malykh-etal-2020-sumtitles</identifier>
<identifier type="doi">10.18653/v1/2020.coling-main.503</identifier>
<location>
<url>https://aclanthology.org/2020.coling-main.503</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>5718</start>
<end>5730</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SumTitles: a Summarization Dataset with Low Extractiveness
%A Malykh, Valentin
%A Chernis, Konstantin
%A Artemova, Ekaterina
%A Piontkovskaya, Irina
%Y Scott, Donia
%Y Bel, Nuria
%Y Zong, Chengqing
%S Proceedings of the 28th International Conference on Computational Linguistics
%D 2020
%8 December
%I International Committee on Computational Linguistics
%C Barcelona, Spain (Online)
%F malykh-etal-2020-sumtitles
%X The existing dialogue summarization corpora are significantly extractive. We introduce a methodology for dataset extractiveness evaluation and present a new low-extractive corpus of movie dialogues for abstractive text summarization along with baseline evaluation. The corpus contains 153k dialogues and consists of three parts: 1) automatically aligned subtitles, 2) automatically aligned scenes from scripts, and 3) manually aligned scenes from scripts. We also present an alignment algorithm which we use to construct the corpus.
%R 10.18653/v1/2020.coling-main.503
%U https://aclanthology.org/2020.coling-main.503
%U https://doi.org/10.18653/v1/2020.coling-main.503
%P 5718-5730
Markdown (Informal)
[SumTitles: a Summarization Dataset with Low Extractiveness](https://aclanthology.org/2020.coling-main.503) (Malykh et al., COLING 2020)
ACL
- Valentin Malykh, Konstantin Chernis, Ekaterina Artemova, and Irina Piontkovskaya. 2020. SumTitles: a Summarization Dataset with Low Extractiveness. In Proceedings of the 28th International Conference on Computational Linguistics, pages 5718–5730, Barcelona, Spain (Online). International Committee on Computational Linguistics.