@inproceedings{dumitran-etal-2025-grile,
title = "{GRILE}: A Benchmark for Grammar Reasoning and Explanation in {R}omanian {LLM}s",
author = "Dumitran, Marius and
Dumitran, Angela and
Danila, Alexandra Mihaela",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.39/",
pages = "316--324",
abstract = "Large language models (LLMs) have revolutionised NLP, yet their pedagogical value for low{-}resource languages remains unclear. We present GRILE, the first open benchmark of 1 151 multiple{-}choice questions harvested from Romanian high{-}stakes exams (National Evaluation, Baccalaureate, university admissions). GRILE enables us to probe two complementary abilities of seven state{-}of{-}the{-}art multilingual and Romanian{-}specific LLMs: (i) selecting the correct answer, and (ii) producing linguistically faithful explanations. While Gemini 2{\textperiodcentered}5 Pro reaches 83{\%} accuracy, most open{-}weight models stay below 65{\%}, and 48{\%} of their explanations contain factual or pedagogical flaws according to expert review. A detailed error analysis pinpoints systematic weaknesses in morphology and in applying the latest DOOM 3 orthographic norms. All data, code and a public web demo are released to catalyse future research. Our findings expose open challenges for trustworthy educational NLP in low{-}resource settings and establish GRILE as a new test{-}bed for controllable explanation generation and evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dumitran-etal-2025-grile">
<titleInfo>
<title>GRILE: A Benchmark for Grammar Reasoning and Explanation in Romanian LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marius</namePart>
<namePart type="family">Dumitran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angela</namePart>
<namePart type="family">Dumitran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandra</namePart>
<namePart type="given">Mihaela</namePart>
<namePart type="family">Danila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Large language models (LLMs) have revolutionised NLP, yet their pedagogical value for low-resource languages remains unclear. We present GRILE, the first open benchmark of 1 151 multiple-choice questions harvested from Romanian high-stakes exams (National Evaluation, Baccalaureate, university admissions). GRILE enables us to probe two complementary abilities of seven state-of-the-art multilingual and Romanian-specific LLMs: (i) selecting the correct answer, and (ii) producing linguistically faithful explanations. While Gemini 2·5 Pro reaches 83% accuracy, most open-weight models stay below 65%, and 48% of their explanations contain factual or pedagogical flaws according to expert review. A detailed error analysis pinpoints systematic weaknesses in morphology and in applying the latest DOOM 3 orthographic norms. All data, code and a public web demo are released to catalyse future research. Our findings expose open challenges for trustworthy educational NLP in low-resource settings and establish GRILE as a new test-bed for controllable explanation generation and evaluation.</abstract>
<identifier type="citekey">dumitran-etal-2025-grile</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.39/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>316</start>
<end>324</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GRILE: A Benchmark for Grammar Reasoning and Explanation in Romanian LLMs
%A Dumitran, Marius
%A Dumitran, Angela
%A Danila, Alexandra Mihaela
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F dumitran-etal-2025-grile
%X Large language models (LLMs) have revolutionised NLP, yet their pedagogical value for low-resource languages remains unclear. We present GRILE, the first open benchmark of 1 151 multiple-choice questions harvested from Romanian high-stakes exams (National Evaluation, Baccalaureate, university admissions). GRILE enables us to probe two complementary abilities of seven state-of-the-art multilingual and Romanian-specific LLMs: (i) selecting the correct answer, and (ii) producing linguistically faithful explanations. While Gemini 2·5 Pro reaches 83% accuracy, most open-weight models stay below 65%, and 48% of their explanations contain factual or pedagogical flaws according to expert review. A detailed error analysis pinpoints systematic weaknesses in morphology and in applying the latest DOOM 3 orthographic norms. All data, code and a public web demo are released to catalyse future research. Our findings expose open challenges for trustworthy educational NLP in low-resource settings and establish GRILE as a new test-bed for controllable explanation generation and evaluation.
%U https://aclanthology.org/2025.ranlp-1.39/
%P 316-324
Markdown (Informal)
[GRILE: A Benchmark for Grammar Reasoning and Explanation in Romanian LLMs](https://aclanthology.org/2025.ranlp-1.39/) (Dumitran et al., RANLP 2025)
ACL
- Marius Dumitran, Angela Dumitran, and Alexandra Mihaela Danila. 2025. GRILE: A Benchmark for Grammar Reasoning and Explanation in Romanian LLMs. In Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era, pages 316–324, Varna, Bulgaria. INCOMA Ltd., Shoumen, Bulgaria.