@inproceedings{rittle-johnson-etal-2025-detecting,
title = "Detecting Math Misconceptions: An {AI} Benchmark Dataset",
author = "Rittle-Johnson, Bethany and
Adler, Rebecca and
Durkin, Kelley and
Burleigh, L and
King, Jules and
Crossley, Scott",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://aclanthology.org/2025.aimecon-wip.3/",
pages = "20--24",
ISBN = "979-8-218-84229-1",
abstract = "To harness the promise of AI for improving math education, AI models need to be able to diagnose math misconceptions. We created an AI benchmark dataset on math misconceptions and other instructionally-relevant errors, comprising over 52,000 explanations written over 15 math questions that were scored by expert human raters."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rittle-johnson-etal-2025-detecting">
<titleInfo>
<title>Detecting Math Misconceptions: An AI Benchmark Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bethany</namePart>
<namePart type="family">Rittle-Johnson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Adler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kelley</namePart>
<namePart type="family">Durkin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">L</namePart>
<namePart type="family">Burleigh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jules</namePart>
<namePart type="family">King</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="family">Crossley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Ormerod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalen</namePart>
<namePart type="family">Beiting Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>National Council on Measurement in Education (NCME)</publisher>
<place>
<placeTerm type="text">Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-218-84229-1</identifier>
</relatedItem>
<abstract>To harness the promise of AI for improving math education, AI models need to be able to diagnose math misconceptions. We created an AI benchmark dataset on math misconceptions and other instructionally-relevant errors, comprising over 52,000 explanations written over 15 math questions that were scored by expert human raters.</abstract>
<identifier type="citekey">rittle-johnson-etal-2025-detecting</identifier>
<location>
<url>https://aclanthology.org/2025.aimecon-wip.3/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>20</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Math Misconceptions: An AI Benchmark Dataset
%A Rittle-Johnson, Bethany
%A Adler, Rebecca
%A Durkin, Kelley
%A Burleigh, L.
%A King, Jules
%A Crossley, Scott
%Y Wilson, Joshua
%Y Ormerod, Christopher
%Y Beiting Parrish, Magdalen
%S Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress
%D 2025
%8 October
%I National Council on Measurement in Education (NCME)
%C Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States
%@ 979-8-218-84229-1
%F rittle-johnson-etal-2025-detecting
%X To harness the promise of AI for improving math education, AI models need to be able to diagnose math misconceptions. We created an AI benchmark dataset on math misconceptions and other instructionally-relevant errors, comprising over 52,000 explanations written over 15 math questions that were scored by expert human raters.
%U https://aclanthology.org/2025.aimecon-wip.3/
%P 20-24
Markdown (Informal)
[Detecting Math Misconceptions: An AI Benchmark Dataset](https://aclanthology.org/2025.aimecon-wip.3/) (Rittle-Johnson et al., AIME-Con 2025)
ACL
- Bethany Rittle-Johnson, Rebecca Adler, Kelley Durkin, L Burleigh, Jules King, and Scott Crossley. 2025. Detecting Math Misconceptions: An AI Benchmark Dataset. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Works in Progress, pages 20–24, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).