@inproceedings{perovic-mihajlov-2026-serbian,
title = "{S}erbian {S}uper{GLUE}: Towards an Evaluation Benchmark for {S}outh {S}lavic Language Models",
author = "Perovic, Mitar and
Mihajlov, Teodora",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Plum, Alistair and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the Second Workshop on Language Models for Low-Resource Languages ({L}o{R}es{LM} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.loreslm-1.30/",
pages = "347--361",
ISBN = "979-8-89176-377-7",
abstract = "We introduce Serbian SuperGLUE, a comprehensive benchmark for evaluating natural language understanding in Serbian, adapted from the English SuperGLUE benchmark. The benchmark comprises seven tasks spanning question answering, natural language inference, and coreference resolution, created through a combination of LLM-based translation with automatic post-editing and native data generation. We evaluate seven encoder-based language models, including both Serbian-specific (BERTi{\'c}, Jerteh) and multilingual models (mmBERT, XLM-RoBERTa variants). Our results reveal that multilingual models remain competitive with language-specific alternatives, with mmBERT achieving the best performance on RTE (75.7{\%}) and XLM-R-BERTi{\'c} leading on BoolQ (82.0{\%}). We observe significant training variance on smaller datasets, with standard deviations exceeding 10{\%} in some configurations, highlighting the importance of multi-seed evaluation for low-resource benchmarking. We release the benchmark, evaluation code, and model checkpoints to facilitate reproducible research on South Slavic language understanding."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="perovic-mihajlov-2026-serbian">
<titleInfo>
<title>Serbian SuperGLUE: Towards an Evaluation Benchmark for South Slavic Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mitar</namePart>
<namePart type="family">Perovic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teodora</namePart>
<namePart type="family">Mihajlov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Language Models for Low-Resource Languages (LoResLM 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alistair</namePart>
<namePart type="family">Plum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-377-7</identifier>
</relatedItem>
<abstract>We introduce Serbian SuperGLUE, a comprehensive benchmark for evaluating natural language understanding in Serbian, adapted from the English SuperGLUE benchmark. The benchmark comprises seven tasks spanning question answering, natural language inference, and coreference resolution, created through a combination of LLM-based translation with automatic post-editing and native data generation. We evaluate seven encoder-based language models, including both Serbian-specific (BERTić, Jerteh) and multilingual models (mmBERT, XLM-RoBERTa variants). Our results reveal that multilingual models remain competitive with language-specific alternatives, with mmBERT achieving the best performance on RTE (75.7%) and XLM-R-BERTić leading on BoolQ (82.0%). We observe significant training variance on smaller datasets, with standard deviations exceeding 10% in some configurations, highlighting the importance of multi-seed evaluation for low-resource benchmarking. We release the benchmark, evaluation code, and model checkpoints to facilitate reproducible research on South Slavic language understanding.</abstract>
<identifier type="citekey">perovic-mihajlov-2026-serbian</identifier>
<location>
<url>https://aclanthology.org/2026.loreslm-1.30/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>347</start>
<end>361</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Serbian SuperGLUE: Towards an Evaluation Benchmark for South Slavic Language Models
%A Perovic, Mitar
%A Mihajlov, Teodora
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Plum, Alistair
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the Second Workshop on Language Models for Low-Resource Languages (LoResLM 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-377-7
%F perovic-mihajlov-2026-serbian
%X We introduce Serbian SuperGLUE, a comprehensive benchmark for evaluating natural language understanding in Serbian, adapted from the English SuperGLUE benchmark. The benchmark comprises seven tasks spanning question answering, natural language inference, and coreference resolution, created through a combination of LLM-based translation with automatic post-editing and native data generation. We evaluate seven encoder-based language models, including both Serbian-specific (BERTić, Jerteh) and multilingual models (mmBERT, XLM-RoBERTa variants). Our results reveal that multilingual models remain competitive with language-specific alternatives, with mmBERT achieving the best performance on RTE (75.7%) and XLM-R-BERTić leading on BoolQ (82.0%). We observe significant training variance on smaller datasets, with standard deviations exceeding 10% in some configurations, highlighting the importance of multi-seed evaluation for low-resource benchmarking. We release the benchmark, evaluation code, and model checkpoints to facilitate reproducible research on South Slavic language understanding.
%U https://aclanthology.org/2026.loreslm-1.30/
%P 347-361
Markdown (Informal)
[Serbian SuperGLUE: Towards an Evaluation Benchmark for South Slavic Language Models](https://aclanthology.org/2026.loreslm-1.30/) (Perovic & Mihajlov, LoResLM 2026)
ACL