@inproceedings{ekgren-etal-2022-lessons,
title = "Lessons Learned from {GPT}-{SW}3: Building the First Large-Scale Generative Language Model for {S}wedish",
author = {Ekgren, Ariel and
Cuba Gyllensten, Amaru and
Gogoulou, Evangelia and
Heiman, Alice and
Verlinden, Severine and
{\"O}hman, Joey and
Carlsson, Fredrik and
Sahlgren, Magnus},
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.376",
pages = "3509--3518",
abstract = "We present GTP-SW3, a 3.5 billion parameter autoregressive language model, trained on a newly created 100 GB Swedish corpus. This paper provides insights with regards to data collection and training, while highlights the challenges of proper model evaluation. The results of quantitive evaluation through perplexity indicate that GPT-SW3 is a competent model in comparison with existing autoregressive models of similar size. Additionally, we perform an extensive prompting study which reveals the good text generation capabilities of GTP-SW3.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ekgren-etal-2022-lessons">
<titleInfo>
<title>Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ariel</namePart>
<namePart type="family">Ekgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amaru</namePart>
<namePart type="family">Cuba Gyllensten</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evangelia</namePart>
<namePart type="family">Gogoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alice</namePart>
<namePart type="family">Heiman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Severine</namePart>
<namePart type="family">Verlinden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joey</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fredrik</namePart>
<namePart type="family">Carlsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magnus</namePart>
<namePart type="family">Sahlgren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present GTP-SW3, a 3.5 billion parameter autoregressive language model, trained on a newly created 100 GB Swedish corpus. This paper provides insights with regards to data collection and training, while highlights the challenges of proper model evaluation. The results of quantitive evaluation through perplexity indicate that GPT-SW3 is a competent model in comparison with existing autoregressive models of similar size. Additionally, we perform an extensive prompting study which reveals the good text generation capabilities of GTP-SW3.</abstract>
<identifier type="citekey">ekgren-etal-2022-lessons</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.376</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>3509</start>
<end>3518</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish
%A Ekgren, Ariel
%A Cuba Gyllensten, Amaru
%A Gogoulou, Evangelia
%A Heiman, Alice
%A Verlinden, Severine
%A Öhman, Joey
%A Carlsson, Fredrik
%A Sahlgren, Magnus
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F ekgren-etal-2022-lessons
%X We present GTP-SW3, a 3.5 billion parameter autoregressive language model, trained on a newly created 100 GB Swedish corpus. This paper provides insights with regards to data collection and training, while highlights the challenges of proper model evaluation. The results of quantitive evaluation through perplexity indicate that GPT-SW3 is a competent model in comparison with existing autoregressive models of similar size. Additionally, we perform an extensive prompting study which reveals the good text generation capabilities of GTP-SW3.
%U https://aclanthology.org/2022.lrec-1.376
%P 3509-3518
Markdown (Informal)
[Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish](https://aclanthology.org/2022.lrec-1.376) (Ekgren et al., LREC 2022)
ACL
- Ariel Ekgren, Amaru Cuba Gyllensten, Evangelia Gogoulou, Alice Heiman, Severine Verlinden, Joey Öhman, Fredrik Carlsson, and Magnus Sahlgren. 2022. Lessons Learned from GPT-SW3: Building the First Large-Scale Generative Language Model for Swedish. In Proceedings of the Thirteenth Language Resources and Evaluation Conference, pages 3509–3518, Marseille, France. European Language Resources Association.