@inproceedings{ekgren-etal-2024-gpt,
title = "{GPT}-{SW}3: An Autoregressive Language Model for the {S}candinavian Languages",
author = {Ekgren, Ariel and
Cuba Gyllensten, Amaru and
Stollenwerk, Felix and
{\"O}hman, Joey and
Isbister, Tim and
Gogoulou, Evangelia and
Carlsson, Fredrik and
Casademont, Judit and
Sahlgren, Magnus},
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.695/",
pages = "7886--7900",
abstract = "This paper details the process of developing the first native large generative language model for the North Germanic languages, GPT-SW3. We cover all parts of the development process, from data collection and processing, training configuration and instruction finetuning, to evaluation, applications, and considerations for release strategies. We discuss pros and cons of developing large language models for smaller languages and in relatively peripheral regions of the globe, and we hope that this paper can serve as a guide and reference for other researchers that undertake the development of large generative models for smaller languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ekgren-etal-2024-gpt">
<titleInfo>
<title>GPT-SW3: An Autoregressive Language Model for the Scandinavian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ariel</namePart>
<namePart type="family">Ekgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amaru</namePart>
<namePart type="family">Cuba Gyllensten</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Felix</namePart>
<namePart type="family">Stollenwerk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joey</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Isbister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evangelia</namePart>
<namePart type="family">Gogoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fredrik</namePart>
<namePart type="family">Carlsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Judit</namePart>
<namePart type="family">Casademont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magnus</namePart>
<namePart type="family">Sahlgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper details the process of developing the first native large generative language model for the North Germanic languages, GPT-SW3. We cover all parts of the development process, from data collection and processing, training configuration and instruction finetuning, to evaluation, applications, and considerations for release strategies. We discuss pros and cons of developing large language models for smaller languages and in relatively peripheral regions of the globe, and we hope that this paper can serve as a guide and reference for other researchers that undertake the development of large generative models for smaller languages.</abstract>
<identifier type="citekey">ekgren-etal-2024-gpt</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.695/</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>7886</start>
<end>7900</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GPT-SW3: An Autoregressive Language Model for the Scandinavian Languages
%A Ekgren, Ariel
%A Cuba Gyllensten, Amaru
%A Stollenwerk, Felix
%A Öhman, Joey
%A Isbister, Tim
%A Gogoulou, Evangelia
%A Carlsson, Fredrik
%A Casademont, Judit
%A Sahlgren, Magnus
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ekgren-etal-2024-gpt
%X This paper details the process of developing the first native large generative language model for the North Germanic languages, GPT-SW3. We cover all parts of the development process, from data collection and processing, training configuration and instruction finetuning, to evaluation, applications, and considerations for release strategies. We discuss pros and cons of developing large language models for smaller languages and in relatively peripheral regions of the globe, and we hope that this paper can serve as a guide and reference for other researchers that undertake the development of large generative models for smaller languages.
%U https://aclanthology.org/2024.lrec-main.695/
%P 7886-7900
Markdown (Informal)
[GPT-SW3: An Autoregressive Language Model for the Scandinavian Languages](https://aclanthology.org/2024.lrec-main.695/) (Ekgren et al., LREC-COLING 2024)
ACL
- Ariel Ekgren, Amaru Cuba Gyllensten, Felix Stollenwerk, Joey Öhman, Tim Isbister, Evangelia Gogoulou, Fredrik Carlsson, Judit Casademont, and Magnus Sahlgren. 2024. GPT-SW3: An Autoregressive Language Model for the Scandinavian Languages. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 7886–7900, Torino, Italia. ELRA and ICCL.