@inproceedings{kurfali-etal-2025-swesat,
title = "{SweSAT}-1.0: {The} {Swedish} {U}niversity Entrance Exam as a Benchmark for Large Language Models",
author = {Kurfal{\i}, Murathan and
Zahra, Shorouq and
Gogoulou, Evangelia and
D{\"u}rlich, Luise and
Carlsson, Fredrik and
Nivre, Joakim},
editor = "Johansson, Richard and
Stymne, Sara",
booktitle = "Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)",
month = mar,
year = "2025",
address = "Tallinn, Estonia",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2025.nodalida-1.36/",
pages = "331--339",
ISBN = "978-9908-53-109-0",
abstract = {This introduces SweSAT-1.0, a new benchmark dataset created from the Swedish university entrance exam (H{\"o}gskoleprovet) to assess large language models in Swedish. The current version of the benchmark includes 867 questions across six different tasks, including reading comprehension, mathematical problem solving, and logical reasoning. We find that some widely used open-source and commercial models excel in verbal tasks, but we also see that all models, even the commercial ones, struggle with reasoning tasks in Swedish. We hope that SweSAT-1.0 will facilitate research on large language models for Swedish by enriching the breadth of available tasks, offering a challenging evaluation benchmark that is free from any translation biases.}
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kurfali-etal-2025-swesat">
<titleInfo>
<title>SweSAT-1.0: The Swedish University Entrance Exam as a Benchmark for Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Murathan</namePart>
<namePart type="family">Kurfalı</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shorouq</namePart>
<namePart type="family">Zahra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evangelia</namePart>
<namePart type="family">Gogoulou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luise</namePart>
<namePart type="family">Dürlich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fredrik</namePart>
<namePart type="family">Carlsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joakim</namePart>
<namePart type="family">Nivre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Stymne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tallinn, Estonia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">978-9908-53-109-0</identifier>
</relatedItem>
<abstract>This introduces SweSAT-1.0, a new benchmark dataset created from the Swedish university entrance exam (Högskoleprovet) to assess large language models in Swedish. The current version of the benchmark includes 867 questions across six different tasks, including reading comprehension, mathematical problem solving, and logical reasoning. We find that some widely used open-source and commercial models excel in verbal tasks, but we also see that all models, even the commercial ones, struggle with reasoning tasks in Swedish. We hope that SweSAT-1.0 will facilitate research on large language models for Swedish by enriching the breadth of available tasks, offering a challenging evaluation benchmark that is free from any translation biases.</abstract>
<identifier type="citekey">kurfali-etal-2025-swesat</identifier>
<location>
<url>https://aclanthology.org/2025.nodalida-1.36/</url>
</location>
<part>
<date>2025-03</date>
<extent unit="page">
<start>331</start>
<end>339</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SweSAT-1.0: The Swedish University Entrance Exam as a Benchmark for Large Language Models
%A Kurfalı, Murathan
%A Zahra, Shorouq
%A Gogoulou, Evangelia
%A Dürlich, Luise
%A Carlsson, Fredrik
%A Nivre, Joakim
%Y Johansson, Richard
%Y Stymne, Sara
%S Proceedings of the Joint 25th Nordic Conference on Computational Linguistics and 11th Baltic Conference on Human Language Technologies (NoDaLiDa/Baltic-HLT 2025)
%D 2025
%8 March
%I University of Tartu Library
%C Tallinn, Estonia
%@ 978-9908-53-109-0
%F kurfali-etal-2025-swesat
%X This introduces SweSAT-1.0, a new benchmark dataset created from the Swedish university entrance exam (Högskoleprovet) to assess large language models in Swedish. The current version of the benchmark includes 867 questions across six different tasks, including reading comprehension, mathematical problem solving, and logical reasoning. We find that some widely used open-source and commercial models excel in verbal tasks, but we also see that all models, even the commercial ones, struggle with reasoning tasks in Swedish. We hope that SweSAT-1.0 will facilitate research on large language models for Swedish by enriching the breadth of available tasks, offering a challenging evaluation benchmark that is free from any translation biases.
%U https://aclanthology.org/2025.nodalida-1.36/
%P 331-339
Markdown (Informal)
[SweSAT-1.0: The Swedish University Entrance Exam as a Benchmark for Large Language Models](https://aclanthology.org/2025.nodalida-1.36/) (Kurfalı et al., NoDaLiDa 2025)
ACL