@inproceedings{ondrejova-suppa-2024-slovaksum,
title = "{S}lovak{S}um: A Large Scale {S}lovak Summarization Dataset",
author = "Ondrejova, Viktoria and
Suppa, Marek",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1298",
pages = "14916--14922",
abstract = "The ability to automatically summarize news articles has become increasingly important due to the vast amount of information available online. Together with the rise of chatbots , Natural Language Processing (NLP) has recently experienced a tremendous amount of development. Despite these advancements, the majority of research is focused on established well-resourced languages, such as English. To contribute to development of the low resource Slovak language, we introduce SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. The abstractive approach, including MBART and mT5 models, was used to evaluate various baselines. The code for the reproduction of our dataset and experiments can be found at https://github.com/NaiveNeuron/slovaksum",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ondrejova-suppa-2024-slovaksum">
<titleInfo>
<title>SlovakSum: A Large Scale Slovak Summarization Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Viktoria</namePart>
<namePart type="family">Ondrejova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marek</namePart>
<namePart type="family">Suppa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The ability to automatically summarize news articles has become increasingly important due to the vast amount of information available online. Together with the rise of chatbots , Natural Language Processing (NLP) has recently experienced a tremendous amount of development. Despite these advancements, the majority of research is focused on established well-resourced languages, such as English. To contribute to development of the low resource Slovak language, we introduce SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. The abstractive approach, including MBART and mT5 models, was used to evaluate various baselines. The code for the reproduction of our dataset and experiments can be found at https://github.com/NaiveNeuron/slovaksum</abstract>
<identifier type="citekey">ondrejova-suppa-2024-slovaksum</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1298</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>14916</start>
<end>14922</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SlovakSum: A Large Scale Slovak Summarization Dataset
%A Ondrejova, Viktoria
%A Suppa, Marek
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ondrejova-suppa-2024-slovaksum
%X The ability to automatically summarize news articles has become increasingly important due to the vast amount of information available online. Together with the rise of chatbots , Natural Language Processing (NLP) has recently experienced a tremendous amount of development. Despite these advancements, the majority of research is focused on established well-resourced languages, such as English. To contribute to development of the low resource Slovak language, we introduce SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. The abstractive approach, including MBART and mT5 models, was used to evaluate various baselines. The code for the reproduction of our dataset and experiments can be found at https://github.com/NaiveNeuron/slovaksum
%U https://aclanthology.org/2024.lrec-main.1298
%P 14916-14922
Markdown (Informal)
[SlovakSum: A Large Scale Slovak Summarization Dataset](https://aclanthology.org/2024.lrec-main.1298) (Ondrejova & Suppa, LREC-COLING 2024)
ACL
- Viktoria Ondrejova and Marek Suppa. 2024. SlovakSum: A Large Scale Slovak Summarization Dataset. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 14916–14922, Torino, Italia. ELRA and ICCL.