@inproceedings{xiao-etal-2026-math,
title = "{MATH}-{IDN}: A Multilingual Mathematical Problem Solving Dataset Featuring Local Languages in {I}ndonesia",
author = "Xiao, Xiao and
Ni'mah, Iftitahu and
Wabula, Yuyun and
Pechenizkiy, Mykola and
Fang, Meng",
editor = "Demberg, Vera and
Inui, Kentaro and
Marquez, Llu{\'i}s",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {EACL} 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-eacl.231/",
pages = "4432--4438",
ISBN = "979-8-89176-386-9",
abstract = "Large Language Models (LLMs) excel at mathematical reasoning in English, but their performance in low-resource languages remains underexplored. This gap is particularly critical in the Indonesian context, where equitable access to AI systems depends on robust multilingual reasoning across diverse local languages.We introduce MATH-IDN, a multilingual benchmark for mathematical problem solving in Indonesian, Javanese, Sundanese, and Buginese, with English as a reference, following the MATH dataset. We evaluate multiple open-source LLMs, including math-specialized, Southeast-Asian-adapted, and general-purpose models, under a zero-shot chain-of-thought setting. Results show that MATH-IDN presents a challenging and discriminative benchmark, revealing substantial performance gaps in low-resource languages, particularly Buginese, and highlighting key limitations in current multilingual reasoning capabilities. Our data and code are available at https://github.com/aialt/MATH-IND."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="xiao-etal-2026-math">
<titleInfo>
<title>MATH-IDN: A Multilingual Mathematical Problem Solving Dataset Featuring Local Languages in Indonesia</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiao</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iftitahu</namePart>
<namePart type="family">Ni’mah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuyun</namePart>
<namePart type="family">Wabula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mykola</namePart>
<namePart type="family">Pechenizkiy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Meng</namePart>
<namePart type="family">Fang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lluís</namePart>
<namePart type="family">Marquez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-386-9</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) excel at mathematical reasoning in English, but their performance in low-resource languages remains underexplored. This gap is particularly critical in the Indonesian context, where equitable access to AI systems depends on robust multilingual reasoning across diverse local languages.We introduce MATH-IDN, a multilingual benchmark for mathematical problem solving in Indonesian, Javanese, Sundanese, and Buginese, with English as a reference, following the MATH dataset. We evaluate multiple open-source LLMs, including math-specialized, Southeast-Asian-adapted, and general-purpose models, under a zero-shot chain-of-thought setting. Results show that MATH-IDN presents a challenging and discriminative benchmark, revealing substantial performance gaps in low-resource languages, particularly Buginese, and highlighting key limitations in current multilingual reasoning capabilities. Our data and code are available at https://github.com/aialt/MATH-IND.</abstract>
<identifier type="citekey">xiao-etal-2026-math</identifier>
<location>
<url>https://aclanthology.org/2026.findings-eacl.231/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>4432</start>
<end>4438</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MATH-IDN: A Multilingual Mathematical Problem Solving Dataset Featuring Local Languages in Indonesia
%A Xiao, Xiao
%A Ni’mah, Iftitahu
%A Wabula, Yuyun
%A Pechenizkiy, Mykola
%A Fang, Meng
%Y Demberg, Vera
%Y Inui, Kentaro
%Y Marquez, Lluís
%S Findings of the Association for Computational Linguistics: EACL 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-386-9
%F xiao-etal-2026-math
%X Large Language Models (LLMs) excel at mathematical reasoning in English, but their performance in low-resource languages remains underexplored. This gap is particularly critical in the Indonesian context, where equitable access to AI systems depends on robust multilingual reasoning across diverse local languages.We introduce MATH-IDN, a multilingual benchmark for mathematical problem solving in Indonesian, Javanese, Sundanese, and Buginese, with English as a reference, following the MATH dataset. We evaluate multiple open-source LLMs, including math-specialized, Southeast-Asian-adapted, and general-purpose models, under a zero-shot chain-of-thought setting. Results show that MATH-IDN presents a challenging and discriminative benchmark, revealing substantial performance gaps in low-resource languages, particularly Buginese, and highlighting key limitations in current multilingual reasoning capabilities. Our data and code are available at https://github.com/aialt/MATH-IND.
%U https://aclanthology.org/2026.findings-eacl.231/
%P 4432-4438
Markdown (Informal)
[MATH-IDN: A Multilingual Mathematical Problem Solving Dataset Featuring Local Languages in Indonesia](https://aclanthology.org/2026.findings-eacl.231/) (Xiao et al., Findings 2026)
ACL