@inproceedings{hosseinbeigi-etal-2025-advancing,
title = "Advancing {P}ersian {LLM} Evaluation",
author = "Hosseinbeigi, Sara Bourbour and
Rohani, Behnam and
Masoudi, Mostafa and
Shamsfard, Mehrnoush and
Saaberi, Zahra and
Manesh, Mostafa Karimi and
Abbasi, Mohammad Amin",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.147/",
doi = "10.18653/v1/2025.findings-naacl.147",
pages = "2711--2727",
ISBN = "979-8-89176-195-7",
abstract = "Evaluation of large language models (LLMs) in low-resource languages like Persian has received less attention than in high-resource languages like English. Existing evaluation approaches for Persian LLMs generally lack comprehensive frameworks, limiting their ability to assess models' performance over a wide range of tasks requiring considerable cultural and contextual knowledge, as well as a deeper understanding of Persian literature and style. This paper first aims to fill this gap by providing two new benchmarks, PeKA and PK-BETS, on topics such as history, literature, and cultural knowledge, as well as challenging the present state-of-the-art models' abilities in a variety of Persian language comprehension tasks. These datasets are meant to reduce data contamination while providing an accurate assessment of Persian LLMs. The second aim of this paper is the general evaluation of LLMs across the current Persian benchmarks to provide a comprehensive performance overview. By offering a structured evaluation methodology, we hope to promote the examination of LLMs in the Persian language."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hosseinbeigi-etal-2025-advancing">
<titleInfo>
<title>Advancing Persian LLM Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="given">Bourbour</namePart>
<namePart type="family">Hosseinbeigi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Behnam</namePart>
<namePart type="family">Rohani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mostafa</namePart>
<namePart type="family">Masoudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehrnoush</namePart>
<namePart type="family">Shamsfard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zahra</namePart>
<namePart type="family">Saaberi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mostafa</namePart>
<namePart type="given">Karimi</namePart>
<namePart type="family">Manesh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Amin</namePart>
<namePart type="family">Abbasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Evaluation of large language models (LLMs) in low-resource languages like Persian has received less attention than in high-resource languages like English. Existing evaluation approaches for Persian LLMs generally lack comprehensive frameworks, limiting their ability to assess models’ performance over a wide range of tasks requiring considerable cultural and contextual knowledge, as well as a deeper understanding of Persian literature and style. This paper first aims to fill this gap by providing two new benchmarks, PeKA and PK-BETS, on topics such as history, literature, and cultural knowledge, as well as challenging the present state-of-the-art models’ abilities in a variety of Persian language comprehension tasks. These datasets are meant to reduce data contamination while providing an accurate assessment of Persian LLMs. The second aim of this paper is the general evaluation of LLMs across the current Persian benchmarks to provide a comprehensive performance overview. By offering a structured evaluation methodology, we hope to promote the examination of LLMs in the Persian language.</abstract>
<identifier type="citekey">hosseinbeigi-etal-2025-advancing</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.147</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.147/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>2711</start>
<end>2727</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advancing Persian LLM Evaluation
%A Hosseinbeigi, Sara Bourbour
%A Rohani, Behnam
%A Masoudi, Mostafa
%A Shamsfard, Mehrnoush
%A Saaberi, Zahra
%A Manesh, Mostafa Karimi
%A Abbasi, Mohammad Amin
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F hosseinbeigi-etal-2025-advancing
%X Evaluation of large language models (LLMs) in low-resource languages like Persian has received less attention than in high-resource languages like English. Existing evaluation approaches for Persian LLMs generally lack comprehensive frameworks, limiting their ability to assess models’ performance over a wide range of tasks requiring considerable cultural and contextual knowledge, as well as a deeper understanding of Persian literature and style. This paper first aims to fill this gap by providing two new benchmarks, PeKA and PK-BETS, on topics such as history, literature, and cultural knowledge, as well as challenging the present state-of-the-art models’ abilities in a variety of Persian language comprehension tasks. These datasets are meant to reduce data contamination while providing an accurate assessment of Persian LLMs. The second aim of this paper is the general evaluation of LLMs across the current Persian benchmarks to provide a comprehensive performance overview. By offering a structured evaluation methodology, we hope to promote the examination of LLMs in the Persian language.
%R 10.18653/v1/2025.findings-naacl.147
%U https://aclanthology.org/2025.findings-naacl.147/
%U https://doi.org/10.18653/v1/2025.findings-naacl.147
%P 2711-2727
Markdown (Informal)
[Advancing Persian LLM Evaluation](https://aclanthology.org/2025.findings-naacl.147/) (Hosseinbeigi et al., Findings 2025)
ACL
- Sara Bourbour Hosseinbeigi, Behnam Rohani, Mostafa Masoudi, Mehrnoush Shamsfard, Zahra Saaberi, Mostafa Karimi Manesh, and Mohammad Amin Abbasi. 2025. Advancing Persian LLM Evaluation. In Findings of the Association for Computational Linguistics: NAACL 2025, pages 2711–2727, Albuquerque, New Mexico. Association for Computational Linguistics.