@inproceedings{frederick-eneye-etal-2025-advances,
title = "Advances in Auto-Grading with Large Language Models: A Cross-Disciplinary Survey",
author = "Frederick Eneye, Tania Amanda Nkoyo and
Ijezue, Chukwuebuka Fortunate and
Imam Amjad, Ahmad and
Amjad, Maaz and
Butt, Sabur and
Casta{\~n}eda-Garza, Gerardo",
editor = {Kochmar, Ekaterina and
Alhafni, Bashar and
Bexte, Marie and
Burstein, Jill and
Horbach, Andrea and
Laarmann-Quante, Ronja and
Tack, Ana{\"i}s and
Yaneva, Victoria and
Yuan, Zheng},
booktitle = "Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.bea-1.35/",
doi = "10.18653/v1/2025.bea-1.35",
pages = "477--498",
ISBN = "979-8-89176-270-1",
abstract = "With the rise and widespread adoption of Large Language Models (LLMs) in recent years, extensive research has been conducted on their applications across various domains. One such domain is education, where a key area of interest for researchers is investigating the implementation and reliability of LLMs in grading student responses. This review paper examines studies on the use of LLMs in grading across six academic sub-fields: educational assessment, essay grading, natural sciences and technology, social sciences and humanities, computer science and engineering, and mathematics. It explores how different LLMs are applied in automated grading, the prompting techniques employed, the effectiveness of LLM-based grading for both structured and open-ended responses, and the patterns observed in grading performance. Additionally, this paper discusses the challenges associated with LLM-based grading systems, such as inconsistencies and the need for human oversight. By synthesizing existing research, this paper provides insights into the current capabilities of LLMs in academic assessment and serves as a foundation for future exploration in this area."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="frederick-eneye-etal-2025-advances">
<titleInfo>
<title>Advances in Auto-Grading with Large Language Models: A Cross-Disciplinary Survey</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tania</namePart>
<namePart type="given">Amanda</namePart>
<namePart type="given">Nkoyo</namePart>
<namePart type="family">Frederick Eneye</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chukwuebuka</namePart>
<namePart type="given">Fortunate</namePart>
<namePart type="family">Ijezue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmad</namePart>
<namePart type="family">Imam Amjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maaz</namePart>
<namePart type="family">Amjad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabur</namePart>
<namePart type="family">Butt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerardo</namePart>
<namePart type="family">Castañeda-Garza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Bexte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrea</namePart>
<namePart type="family">Horbach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anaïs</namePart>
<namePart type="family">Tack</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Victoria</namePart>
<namePart type="family">Yaneva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-270-1</identifier>
</relatedItem>
<abstract>With the rise and widespread adoption of Large Language Models (LLMs) in recent years, extensive research has been conducted on their applications across various domains. One such domain is education, where a key area of interest for researchers is investigating the implementation and reliability of LLMs in grading student responses. This review paper examines studies on the use of LLMs in grading across six academic sub-fields: educational assessment, essay grading, natural sciences and technology, social sciences and humanities, computer science and engineering, and mathematics. It explores how different LLMs are applied in automated grading, the prompting techniques employed, the effectiveness of LLM-based grading for both structured and open-ended responses, and the patterns observed in grading performance. Additionally, this paper discusses the challenges associated with LLM-based grading systems, such as inconsistencies and the need for human oversight. By synthesizing existing research, this paper provides insights into the current capabilities of LLMs in academic assessment and serves as a foundation for future exploration in this area.</abstract>
<identifier type="citekey">frederick-eneye-etal-2025-advances</identifier>
<identifier type="doi">10.18653/v1/2025.bea-1.35</identifier>
<location>
<url>https://aclanthology.org/2025.bea-1.35/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>477</start>
<end>498</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advances in Auto-Grading with Large Language Models: A Cross-Disciplinary Survey
%A Frederick Eneye, Tania Amanda Nkoyo
%A Ijezue, Chukwuebuka Fortunate
%A Imam Amjad, Ahmad
%A Amjad, Maaz
%A Butt, Sabur
%A Castañeda-Garza, Gerardo
%Y Kochmar, Ekaterina
%Y Alhafni, Bashar
%Y Bexte, Marie
%Y Burstein, Jill
%Y Horbach, Andrea
%Y Laarmann-Quante, Ronja
%Y Tack, Anaïs
%Y Yaneva, Victoria
%Y Yuan, Zheng
%S Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-270-1
%F frederick-eneye-etal-2025-advances
%X With the rise and widespread adoption of Large Language Models (LLMs) in recent years, extensive research has been conducted on their applications across various domains. One such domain is education, where a key area of interest for researchers is investigating the implementation and reliability of LLMs in grading student responses. This review paper examines studies on the use of LLMs in grading across six academic sub-fields: educational assessment, essay grading, natural sciences and technology, social sciences and humanities, computer science and engineering, and mathematics. It explores how different LLMs are applied in automated grading, the prompting techniques employed, the effectiveness of LLM-based grading for both structured and open-ended responses, and the patterns observed in grading performance. Additionally, this paper discusses the challenges associated with LLM-based grading systems, such as inconsistencies and the need for human oversight. By synthesizing existing research, this paper provides insights into the current capabilities of LLMs in academic assessment and serves as a foundation for future exploration in this area.
%R 10.18653/v1/2025.bea-1.35
%U https://aclanthology.org/2025.bea-1.35/
%U https://doi.org/10.18653/v1/2025.bea-1.35
%P 477-498
Markdown (Informal)
[Advances in Auto-Grading with Large Language Models: A Cross-Disciplinary Survey](https://aclanthology.org/2025.bea-1.35/) (Frederick Eneye et al., BEA 2025)
ACL
- Tania Amanda Nkoyo Frederick Eneye, Chukwuebuka Fortunate Ijezue, Ahmad Imam Amjad, Maaz Amjad, Sabur Butt, and Gerardo Castañeda-Garza. 2025. Advances in Auto-Grading with Large Language Models: A Cross-Disciplinary Survey. In Proceedings of the 20th Workshop on Innovative Use of NLP for Building Educational Applications (BEA 2025), pages 477–498, Vienna, Austria. Association for Computational Linguistics.