@inproceedings{vejlgaard-holm-etal-2025-danoliteracy,
title = "Danoliteracy of Generative Large Language Models",
author = "Vejlgaard Holm, S{\o}ren and
Hansen, Lars Kai and
Nielsen, Martin Carsten",
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.78/",
pages = "785--800",
ISBN = "978-9908-53-109-0",
abstract = "The language technology moonshot moment of Generative Large Language Models (GLLMs) was not limited to English: These models brought a surge of technological applications, investments, and hype to low-resource languages as well. However, the capabilities of these models in languages such as Danish were, until recently, difficult to verify beyond qualitative demonstrations due to a lack of applicable evaluation corpora. We present a GLLM benchmark to evaluate Danoliteracy, a measure of Danish language and cultural competency across eight diverse scenarios such as Danish citizenship tests and abstractive social media question answering. This limited-size benchmark was found to produce a robust ranking that correlates to human feedback at $\rho \sim 0.8$ with GPT-4 and Claude Opus models achieving the highest rankings. Analyzing these model results across scenarios, we find one strong underlying factor explaining 95{\%} of scenario performance variance for GLLMs in Danish, suggesting a $g$ factor of model consistency in language adaptation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vejlgaard-holm-etal-2025-danoliteracy">
<titleInfo>
<title>Danoliteracy of Generative Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Søren</namePart>
<namePart type="family">Vejlgaard Holm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lars</namePart>
<namePart type="given">Kai</namePart>
<namePart type="family">Hansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="given">Carsten</namePart>
<namePart type="family">Nielsen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>The language technology moonshot moment of Generative Large Language Models (GLLMs) was not limited to English: These models brought a surge of technological applications, investments, and hype to low-resource languages as well. However, the capabilities of these models in languages such as Danish were, until recently, difficult to verify beyond qualitative demonstrations due to a lack of applicable evaluation corpora. We present a GLLM benchmark to evaluate Danoliteracy, a measure of Danish language and cultural competency across eight diverse scenarios such as Danish citizenship tests and abstractive social media question answering. This limited-size benchmark was found to produce a robust ranking that correlates to human feedback at ρ \sim 0.8 with GPT-4 and Claude Opus models achieving the highest rankings. Analyzing these model results across scenarios, we find one strong underlying factor explaining 95% of scenario performance variance for GLLMs in Danish, suggesting a g factor of model consistency in language adaptation.</abstract>
<identifier type="citekey">vejlgaard-holm-etal-2025-danoliteracy</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.78/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>785</start>
<end>800</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Danoliteracy of Generative Large Language Models
%A Vejlgaard Holm, Søren
%A Hansen, Lars Kai
%A Nielsen, Martin Carsten
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F vejlgaard-holm-etal-2025-danoliteracy
%X The language technology moonshot moment of Generative Large Language Models (GLLMs) was not limited to English: These models brought a surge of technological applications, investments, and hype to low-resource languages as well. However, the capabilities of these models in languages such as Danish were, until recently, difficult to verify beyond qualitative demonstrations due to a lack of applicable evaluation corpora. We present a GLLM benchmark to evaluate Danoliteracy, a measure of Danish language and cultural competency across eight diverse scenarios such as Danish citizenship tests and abstractive social media question answering. This limited-size benchmark was found to produce a robust ranking that correlates to human feedback at ρ \sim 0.8 with GPT-4 and Claude Opus models achieving the highest rankings. Analyzing these model results across scenarios, we find one strong underlying factor explaining 95% of scenario performance variance for GLLMs in Danish, suggesting a g factor of model consistency in language adaptation.
%U https://aclanthology.org/2025.nodalida-1.78/
%P 785-800
Markdown (Informal)
[Danoliteracy of Generative Large Language Models](https://aclanthology.org/2025.nodalida-1.78/) (Vejlgaard Holm et al., NoDaLiDa 2025)
ACL
- Søren Vejlgaard Holm, Lars Kai Hansen, and Martin Carsten Nielsen. 2025. Danoliteracy of Generative Large Language Models. In Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025), pages 785–800, Tallinn, Estonia. University of Tartu Library.