@inproceedings{kolding-etal-2023-dansumt5,
title = "{D}an{S}um{T}5: Automatic Abstractive Summarization for {D}anish",
author = "Kolding, Sara and
Nymann, Katrine and
Hansen, Ida and
Enevoldsen, Kenneth and
Kristensen-McLachlan, Ross",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.25/",
pages = "248--264",
abstract = "Automatic abstractive text summarization is a challenging task in the field of natural language processing. This paper presents a model for domain-specific sum marization for Danish news articles, Dan SumT5; an mT5 model fine-tuned on a cleaned subset of the DaNewsroom dataset consisting of abstractive summary-article pairs. The resulting state-of-the-art model is evaluated both quantitatively and qualitatively, using ROUGE and BERTScore metrics and human rankings of the summaries. We find that although model refinements increase quantitative and qualitative performance, the model is still prone to factual errors. We discuss the limitations of current evaluation methods for automatic abstractive summarization and underline the need for improved metrics and transparency within the field. We suggest that future work should employ methods for detecting and reducing errors in model output and methods for referenceless evaluation of summaries."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kolding-etal-2023-dansumt5">
<titleInfo>
<title>DanSumT5: Automatic Abstractive Summarization for Danish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Kolding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katrine</namePart>
<namePart type="family">Nymann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ida</namePart>
<namePart type="family">Hansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Enevoldsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ross</namePart>
<namePart type="family">Kristensen-McLachlan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic abstractive text summarization is a challenging task in the field of natural language processing. This paper presents a model for domain-specific sum marization for Danish news articles, Dan SumT5; an mT5 model fine-tuned on a cleaned subset of the DaNewsroom dataset consisting of abstractive summary-article pairs. The resulting state-of-the-art model is evaluated both quantitatively and qualitatively, using ROUGE and BERTScore metrics and human rankings of the summaries. We find that although model refinements increase quantitative and qualitative performance, the model is still prone to factual errors. We discuss the limitations of current evaluation methods for automatic abstractive summarization and underline the need for improved metrics and transparency within the field. We suggest that future work should employ methods for detecting and reducing errors in model output and methods for referenceless evaluation of summaries.</abstract>
<identifier type="citekey">kolding-etal-2023-dansumt5</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.25/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>248</start>
<end>264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DanSumT5: Automatic Abstractive Summarization for Danish
%A Kolding, Sara
%A Nymann, Katrine
%A Hansen, Ida
%A Enevoldsen, Kenneth
%A Kristensen-McLachlan, Ross
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F kolding-etal-2023-dansumt5
%X Automatic abstractive text summarization is a challenging task in the field of natural language processing. This paper presents a model for domain-specific sum marization for Danish news articles, Dan SumT5; an mT5 model fine-tuned on a cleaned subset of the DaNewsroom dataset consisting of abstractive summary-article pairs. The resulting state-of-the-art model is evaluated both quantitatively and qualitatively, using ROUGE and BERTScore metrics and human rankings of the summaries. We find that although model refinements increase quantitative and qualitative performance, the model is still prone to factual errors. We discuss the limitations of current evaluation methods for automatic abstractive summarization and underline the need for improved metrics and transparency within the field. We suggest that future work should employ methods for detecting and reducing errors in model output and methods for referenceless evaluation of summaries.
%U https://aclanthology.org/2023.nodalida-1.25/
%P 248-264
Markdown (Informal)
[DanSumT5: Automatic Abstractive Summarization for Danish](https://aclanthology.org/2023.nodalida-1.25/) (Kolding et al., NoDaLiDa 2023)
ACL
- Sara Kolding, Katrine Nymann, Ida Hansen, Kenneth Enevoldsen, and Ross Kristensen-McLachlan. 2023. DanSumT5: Automatic Abstractive Summarization for Danish. In Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), pages 248–264, Tórshavn, Faroe Islands. University of Tartu Library.