@inproceedings{umbet-etal-2025-kazbench,
title = "{K}az{B}ench-{KK}: A Cultural-Knowledge Benchmark for {K}azakh",
author = "Umbet, Sanzhar and
Murzakhmetov, Sanzhar and
Sagyndyk, Beksultan and
Yakunin, Kirill and
Akishev, Timur and
Zubitski, Pavel",
editor = "Le Ferrand, {\'E}ric and
Klyachko, Elena and
Postnikova, Anna and
Shavrina, Tatiana and
Serikov, Oleg and
Voloshina, Ekaterina and
Vylomova, Ekaterina",
booktitle = "Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics",
month = aug,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.fieldmatters-1.4/",
pages = "38--57",
ISBN = "979-8-89176-282-4",
abstract = "We introduce KazBench-KK, a comprehensive 7,111-question multiple-choice benchmark designed to assess large language models' understanding of culturally grounded Kazakh knowledge. By combining expert-curated topics with LLM-assisted web mining, we create a diverse dataset spanning 17 culturally salient domains, including pastoral traditions, social hierarchies, and contemporary politics. Beyond evaluation, KazBench-KK serves as a practical tool for field linguists, enabling rapid lexical elicitation, glossing, and topic prioritization. Our benchmarking of various open-source LLMs reveals that reinforcement-tuned models outperform others, but smaller, domain-focused fine-tunes can rival larger models in specific cultural contexts."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="umbet-etal-2025-kazbench">
<titleInfo>
<title>KazBench-KK: A Cultural-Knowledge Benchmark for Kazakh</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanzhar</namePart>
<namePart type="family">Umbet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanzhar</namePart>
<namePart type="family">Murzakhmetov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Beksultan</namePart>
<namePart type="family">Sagyndyk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kirill</namePart>
<namePart type="family">Yakunin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Timur</namePart>
<namePart type="family">Akishev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Zubitski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Éric</namePart>
<namePart type="family">Le Ferrand</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Klyachko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Postnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiana</namePart>
<namePart type="family">Shavrina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleg</namePart>
<namePart type="family">Serikov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Voloshina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Vylomova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-282-4</identifier>
</relatedItem>
<abstract>We introduce KazBench-KK, a comprehensive 7,111-question multiple-choice benchmark designed to assess large language models’ understanding of culturally grounded Kazakh knowledge. By combining expert-curated topics with LLM-assisted web mining, we create a diverse dataset spanning 17 culturally salient domains, including pastoral traditions, social hierarchies, and contemporary politics. Beyond evaluation, KazBench-KK serves as a practical tool for field linguists, enabling rapid lexical elicitation, glossing, and topic prioritization. Our benchmarking of various open-source LLMs reveals that reinforcement-tuned models outperform others, but smaller, domain-focused fine-tunes can rival larger models in specific cultural contexts.</abstract>
<identifier type="citekey">umbet-etal-2025-kazbench</identifier>
<location>
<url>https://aclanthology.org/2025.fieldmatters-1.4/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>38</start>
<end>57</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T KazBench-KK: A Cultural-Knowledge Benchmark for Kazakh
%A Umbet, Sanzhar
%A Murzakhmetov, Sanzhar
%A Sagyndyk, Beksultan
%A Yakunin, Kirill
%A Akishev, Timur
%A Zubitski, Pavel
%Y Le Ferrand, Éric
%Y Klyachko, Elena
%Y Postnikova, Anna
%Y Shavrina, Tatiana
%Y Serikov, Oleg
%Y Voloshina, Ekaterina
%Y Vylomova, Ekaterina
%S Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics
%D 2025
%8 August
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-282-4
%F umbet-etal-2025-kazbench
%X We introduce KazBench-KK, a comprehensive 7,111-question multiple-choice benchmark designed to assess large language models’ understanding of culturally grounded Kazakh knowledge. By combining expert-curated topics with LLM-assisted web mining, we create a diverse dataset spanning 17 culturally salient domains, including pastoral traditions, social hierarchies, and contemporary politics. Beyond evaluation, KazBench-KK serves as a practical tool for field linguists, enabling rapid lexical elicitation, glossing, and topic prioritization. Our benchmarking of various open-source LLMs reveals that reinforcement-tuned models outperform others, but smaller, domain-focused fine-tunes can rival larger models in specific cultural contexts.
%U https://aclanthology.org/2025.fieldmatters-1.4/
%P 38-57
Markdown (Informal)
[KazBench-KK: A Cultural-Knowledge Benchmark for Kazakh](https://aclanthology.org/2025.fieldmatters-1.4/) (Umbet et al., FieldMatters 2025)
ACL
- Sanzhar Umbet, Sanzhar Murzakhmetov, Beksultan Sagyndyk, Kirill Yakunin, Timur Akishev, and Pavel Zubitski. 2025. KazBench-KK: A Cultural-Knowledge Benchmark for Kazakh. In Proceedings of the Fourth Workshop on NLP Applications to Field Linguistics, pages 38–57, Vienna, Austria. Association for Computational Linguistics.