@inproceedings{sverrisson-einarsson-2023-abstractive,
title = "Abstractive Text Summarization for {I}celandic",
author = "Sverrisson, {\TH}{\'o}r and
Einarsson, Hafsteinn",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.3/",
pages = "17--31",
abstract = "In this work, we studied methods for automatic abstractive summarization in a low-resource setting using Icelandic text, which is morphologically rich and has limited data compared to languages such as English. We collected and published the first publicly available abstractive summarization dataset for Icelandic and used it for training and evaluation of our models. We found that using multilingual pre-training in this setting led to improved performance, with the multilingual mT5 model consistently outperforming a similar model pre-trained from scratch on Icelandic text only. Additionally, we explored the use of machine translations for fine-tuning data augmentation and found that fine-tuning on the augmented data followed by fine-tuning on Icelandic data improved the results. This work highlights the importance of both high-quality training data and multilingual pre-training in achieving effective abstractive summarization in low-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sverrisson-einarsson-2023-abstractive">
<titleInfo>
<title>Abstractive Text Summarization for Icelandic</title>
</titleInfo>
<name type="personal">
<namePart type="given">\THór</namePart>
<namePart type="family">Sverrisson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hafsteinn</namePart>
<namePart type="family">Einarsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we studied methods for automatic abstractive summarization in a low-resource setting using Icelandic text, which is morphologically rich and has limited data compared to languages such as English. We collected and published the first publicly available abstractive summarization dataset for Icelandic and used it for training and evaluation of our models. We found that using multilingual pre-training in this setting led to improved performance, with the multilingual mT5 model consistently outperforming a similar model pre-trained from scratch on Icelandic text only. Additionally, we explored the use of machine translations for fine-tuning data augmentation and found that fine-tuning on the augmented data followed by fine-tuning on Icelandic data improved the results. This work highlights the importance of both high-quality training data and multilingual pre-training in achieving effective abstractive summarization in low-resource languages.</abstract>
<identifier type="citekey">sverrisson-einarsson-2023-abstractive</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.3/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>17</start>
<end>31</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Abstractive Text Summarization for Icelandic
%A Sverrisson, \THór
%A Einarsson, Hafsteinn
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F sverrisson-einarsson-2023-abstractive
%X In this work, we studied methods for automatic abstractive summarization in a low-resource setting using Icelandic text, which is morphologically rich and has limited data compared to languages such as English. We collected and published the first publicly available abstractive summarization dataset for Icelandic and used it for training and evaluation of our models. We found that using multilingual pre-training in this setting led to improved performance, with the multilingual mT5 model consistently outperforming a similar model pre-trained from scratch on Icelandic text only. Additionally, we explored the use of machine translations for fine-tuning data augmentation and found that fine-tuning on the augmented data followed by fine-tuning on Icelandic data improved the results. This work highlights the importance of both high-quality training data and multilingual pre-training in achieving effective abstractive summarization in low-resource languages.
%U https://aclanthology.org/2023.nodalida-1.3/
%P 17-31
Markdown (Informal)
[Abstractive Text Summarization for Icelandic](https://aclanthology.org/2023.nodalida-1.3/) (Sverrisson & Einarsson, NoDaLiDa 2023)
ACL
- Þór Sverrisson and Hafsteinn Einarsson. 2023. Abstractive Text Summarization for Icelandic. In Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa), pages 17–31, Tórshavn, Faroe Islands. University of Tartu Library.