@inproceedings{zeinalipour-etal-2025-persianmcq,
title = "{P}ersian{MCQ}-Instruct: A Comprehensive Resource for Generating Multiple-Choice Questions in {P}ersian",
author = "Zeinalipour, Kamyar and
Jamshidi, Neda and
Akbari, Fahimeh and
Maggini, Marco and
Bianchini, Monica and
Gori, Marco",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loreslm-1.27/",
pages = "344--372",
abstract = "We present PersianMCQ-Instruct, a comprehensive resource that includes a dataset and advanced models for generating multiple-choice questions (MCQs) in standard Iranian Persian, a low-resource language spoken by over 80 million people. This resource features three state-of-the-art models for Persian MCQ generation: PMCQ-Gemma2-9b, PMCQ-Llama3.1-8b, and PMCQ-Mistral-7B. Inspired by the Agent Instruct framework and GPT-4o, we created the dataset by curating over 4,000 unique Persian Wikipedia pages, resulting in three MCQs per page and a total of over 12,000 questions. To ensure the quality of this dataset, we conducted human evaluations and model fine-tuning, both of which demonstrated significant performance improvements in Persian MCQ generation. The dataset and models are publicly available, offering valuable tools for researchers and educators, with particular benefits for advancing Persian-language educational technology."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zeinalipour-etal-2025-persianmcq">
<titleInfo>
<title>PersianMCQ-Instruct: A Comprehensive Resource for Generating Multiple-Choice Questions in Persian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kamyar</namePart>
<namePart type="family">Zeinalipour</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Neda</namePart>
<namePart type="family">Jamshidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fahimeh</namePart>
<namePart type="family">Akbari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Maggini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Monica</namePart>
<namePart type="family">Bianchini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marco</namePart>
<namePart type="family">Gori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Models for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present PersianMCQ-Instruct, a comprehensive resource that includes a dataset and advanced models for generating multiple-choice questions (MCQs) in standard Iranian Persian, a low-resource language spoken by over 80 million people. This resource features three state-of-the-art models for Persian MCQ generation: PMCQ-Gemma2-9b, PMCQ-Llama3.1-8b, and PMCQ-Mistral-7B. Inspired by the Agent Instruct framework and GPT-4o, we created the dataset by curating over 4,000 unique Persian Wikipedia pages, resulting in three MCQs per page and a total of over 12,000 questions. To ensure the quality of this dataset, we conducted human evaluations and model fine-tuning, both of which demonstrated significant performance improvements in Persian MCQ generation. The dataset and models are publicly available, offering valuable tools for researchers and educators, with particular benefits for advancing Persian-language educational technology.</abstract>
<identifier type="citekey">zeinalipour-etal-2025-persianmcq</identifier>
<location>
<url>https://aclanthology.org/2025.loreslm-1.27/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>344</start>
<end>372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PersianMCQ-Instruct: A Comprehensive Resource for Generating Multiple-Choice Questions in Persian
%A Zeinalipour, Kamyar
%A Jamshidi, Neda
%A Akbari, Fahimeh
%A Maggini, Marco
%A Bianchini, Monica
%A Gori, Marco
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the First Workshop on Language Models for Low-Resource Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F zeinalipour-etal-2025-persianmcq
%X We present PersianMCQ-Instruct, a comprehensive resource that includes a dataset and advanced models for generating multiple-choice questions (MCQs) in standard Iranian Persian, a low-resource language spoken by over 80 million people. This resource features three state-of-the-art models for Persian MCQ generation: PMCQ-Gemma2-9b, PMCQ-Llama3.1-8b, and PMCQ-Mistral-7B. Inspired by the Agent Instruct framework and GPT-4o, we created the dataset by curating over 4,000 unique Persian Wikipedia pages, resulting in three MCQs per page and a total of over 12,000 questions. To ensure the quality of this dataset, we conducted human evaluations and model fine-tuning, both of which demonstrated significant performance improvements in Persian MCQ generation. The dataset and models are publicly available, offering valuable tools for researchers and educators, with particular benefits for advancing Persian-language educational technology.
%U https://aclanthology.org/2025.loreslm-1.27/
%P 344-372
Markdown (Informal)
[PersianMCQ-Instruct: A Comprehensive Resource for Generating Multiple-Choice Questions in Persian](https://aclanthology.org/2025.loreslm-1.27/) (Zeinalipour et al., LoResLM 2025)
ACL