@inproceedings{peters-etal-2025-review,
title = "Review of Text-Based Approaches to Item Difficulty Modeling in Large-Scale Assessments",
author = "Peters, Sydney and
Zhang, Nan and
Jiao, Hong and
Li, Ming and
Zhou, Tianyi",
editor = "Wilson, Joshua and
Ormerod, Christopher and
Beiting Parrish, Magdalen",
booktitle = "Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Coordinated Session Papers",
month = oct,
year = "2025",
address = "Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States",
publisher = "National Council on Measurement in Education (NCME)",
url = "https://aclanthology.org/2025.aimecon-sessions.4/",
pages = "37--47",
ISBN = "979-8-218-84230-7",
abstract = "Item difficulty plays a crucial role in evaluating item quality, test form assembly, and interpretation of scores in large-scale assessments. Traditional approaches to estimate item difficulty rely on item response data collected in field testing, which can be time-consuming and costly. To overcome these challenges, text-based approaches leveraging machine learning and natural language processing have emerged as promising alternatives. This paper reviews and synthesizes 37 articles on automated item difficulty prediction in large-scale assessments. Each study is synthesized in terms of the dataset, difficulty parameter, subject domain, item type, number of items, training and test data split, input, features, model, evaluation criteria, and model performance outcomes. Overall, text-based models achieved moderate to high predictive performance, highlighting the potential of text-based item difficulty modeling to enhance the current practices of item quality evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="peters-etal-2025-review">
<titleInfo>
<title>Review of Text-Based Approaches to Item Difficulty Modeling in Large-Scale Assessments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sydney</namePart>
<namePart type="family">Peters</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nan</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hong</namePart>
<namePart type="family">Jiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ming</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tianyi</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Coordinated Session Papers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Wilson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Ormerod</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Magdalen</namePart>
<namePart type="family">Beiting Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>National Council on Measurement in Education (NCME)</publisher>
<place>
<placeTerm type="text">Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-218-84230-7</identifier>
</relatedItem>
<abstract>Item difficulty plays a crucial role in evaluating item quality, test form assembly, and interpretation of scores in large-scale assessments. Traditional approaches to estimate item difficulty rely on item response data collected in field testing, which can be time-consuming and costly. To overcome these challenges, text-based approaches leveraging machine learning and natural language processing have emerged as promising alternatives. This paper reviews and synthesizes 37 articles on automated item difficulty prediction in large-scale assessments. Each study is synthesized in terms of the dataset, difficulty parameter, subject domain, item type, number of items, training and test data split, input, features, model, evaluation criteria, and model performance outcomes. Overall, text-based models achieved moderate to high predictive performance, highlighting the potential of text-based item difficulty modeling to enhance the current practices of item quality evaluation.</abstract>
<identifier type="citekey">peters-etal-2025-review</identifier>
<location>
<url>https://aclanthology.org/2025.aimecon-sessions.4/</url>
</location>
<part>
<date>2025-10</date>
<extent unit="page">
<start>37</start>
<end>47</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Review of Text-Based Approaches to Item Difficulty Modeling in Large-Scale Assessments
%A Peters, Sydney
%A Zhang, Nan
%A Jiao, Hong
%A Li, Ming
%A Zhou, Tianyi
%Y Wilson, Joshua
%Y Ormerod, Christopher
%Y Beiting Parrish, Magdalen
%S Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Coordinated Session Papers
%D 2025
%8 October
%I National Council on Measurement in Education (NCME)
%C Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States
%@ 979-8-218-84230-7
%F peters-etal-2025-review
%X Item difficulty plays a crucial role in evaluating item quality, test form assembly, and interpretation of scores in large-scale assessments. Traditional approaches to estimate item difficulty rely on item response data collected in field testing, which can be time-consuming and costly. To overcome these challenges, text-based approaches leveraging machine learning and natural language processing have emerged as promising alternatives. This paper reviews and synthesizes 37 articles on automated item difficulty prediction in large-scale assessments. Each study is synthesized in terms of the dataset, difficulty parameter, subject domain, item type, number of items, training and test data split, input, features, model, evaluation criteria, and model performance outcomes. Overall, text-based models achieved moderate to high predictive performance, highlighting the potential of text-based item difficulty modeling to enhance the current practices of item quality evaluation.
%U https://aclanthology.org/2025.aimecon-sessions.4/
%P 37-47
Markdown (Informal)
[Review of Text-Based Approaches to Item Difficulty Modeling in Large-Scale Assessments](https://aclanthology.org/2025.aimecon-sessions.4/) (Peters et al., AIME-Con 2025)
ACL
- Sydney Peters, Nan Zhang, Hong Jiao, Ming Li, and Tianyi Zhou. 2025. Review of Text-Based Approaches to Item Difficulty Modeling in Large-Scale Assessments. In Proceedings of the Artificial Intelligence in Measurement and Education Conference (AIME-Con): Coordinated Session Papers, pages 37–47, Wyndham Grand Pittsburgh, Downtown, Pittsburgh, Pennsylvania, United States. National Council on Measurement in Education (NCME).