@inproceedings{golchin-etal-2025-grading,
title = "Grading Massive Open Online Courses Using Large Language Models",
author = "Golchin, Shahriar and
Garuda, Nikhil and
Impey, Christopher and
Wenger, Matthew",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.263/",
pages = "3899--3912",
abstract = "Massive open online courses (MOOCs) offer free education globally. Despite this democratization of learning, the massive enrollment in these courses makes it impractical for an instructor to assess every student`s writing assignment. As a result, peer grading, often guided by a straightforward rubric, is the method of choice. While convenient, peer grading often falls short in terms of reliability and validity. In this study, we explore the feasibility of using large language models (LLMs) to replace peer grading in MOOCs. To this end, we adapt the zero-shot chain-of-thought (ZCoT) prompting technique to automate the feedback process once the LLM assigns a score to an assignment. Specifically, to instruct LLMs for grading, we use three distinct prompts based on ZCoT: (1) ZCoT with instructor-provided correct answers, (2) ZCoT with both instructor-provided correct answers and rubrics, and (3) ZCoT with instructor-provided correct answers and LLM-generated rubrics. We tested these prompts in 18 different scenarios using two LLMs{---}GPT-4 and GPT-3.5{---}across three MOOCs: Introductory Astronomy, Astrobiology, and the History and Philosophy of Astronomy. Our results show that ZCoT, when augmented with instructor-provided correct answers and rubrics, produces grades that are more aligned with those assigned by instructors compared to peer grading. Finally, our findings indicate a promising potential for automated grading systems in MOOCs, especially in subjects with well-defined rubrics, to improve the learning experience for millions of online learners worldwide."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="golchin-etal-2025-grading">
<titleInfo>
<title>Grading Massive Open Online Courses Using Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shahriar</namePart>
<namePart type="family">Golchin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikhil</namePart>
<namePart type="family">Garuda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Impey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Wenger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Massive open online courses (MOOCs) offer free education globally. Despite this democratization of learning, the massive enrollment in these courses makes it impractical for an instructor to assess every student‘s writing assignment. As a result, peer grading, often guided by a straightforward rubric, is the method of choice. While convenient, peer grading often falls short in terms of reliability and validity. In this study, we explore the feasibility of using large language models (LLMs) to replace peer grading in MOOCs. To this end, we adapt the zero-shot chain-of-thought (ZCoT) prompting technique to automate the feedback process once the LLM assigns a score to an assignment. Specifically, to instruct LLMs for grading, we use three distinct prompts based on ZCoT: (1) ZCoT with instructor-provided correct answers, (2) ZCoT with both instructor-provided correct answers and rubrics, and (3) ZCoT with instructor-provided correct answers and LLM-generated rubrics. We tested these prompts in 18 different scenarios using two LLMs—GPT-4 and GPT-3.5—across three MOOCs: Introductory Astronomy, Astrobiology, and the History and Philosophy of Astronomy. Our results show that ZCoT, when augmented with instructor-provided correct answers and rubrics, produces grades that are more aligned with those assigned by instructors compared to peer grading. Finally, our findings indicate a promising potential for automated grading systems in MOOCs, especially in subjects with well-defined rubrics, to improve the learning experience for millions of online learners worldwide.</abstract>
<identifier type="citekey">golchin-etal-2025-grading</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.263/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>3899</start>
<end>3912</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Grading Massive Open Online Courses Using Large Language Models
%A Golchin, Shahriar
%A Garuda, Nikhil
%A Impey, Christopher
%A Wenger, Matthew
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F golchin-etal-2025-grading
%X Massive open online courses (MOOCs) offer free education globally. Despite this democratization of learning, the massive enrollment in these courses makes it impractical for an instructor to assess every student‘s writing assignment. As a result, peer grading, often guided by a straightforward rubric, is the method of choice. While convenient, peer grading often falls short in terms of reliability and validity. In this study, we explore the feasibility of using large language models (LLMs) to replace peer grading in MOOCs. To this end, we adapt the zero-shot chain-of-thought (ZCoT) prompting technique to automate the feedback process once the LLM assigns a score to an assignment. Specifically, to instruct LLMs for grading, we use three distinct prompts based on ZCoT: (1) ZCoT with instructor-provided correct answers, (2) ZCoT with both instructor-provided correct answers and rubrics, and (3) ZCoT with instructor-provided correct answers and LLM-generated rubrics. We tested these prompts in 18 different scenarios using two LLMs—GPT-4 and GPT-3.5—across three MOOCs: Introductory Astronomy, Astrobiology, and the History and Philosophy of Astronomy. Our results show that ZCoT, when augmented with instructor-provided correct answers and rubrics, produces grades that are more aligned with those assigned by instructors compared to peer grading. Finally, our findings indicate a promising potential for automated grading systems in MOOCs, especially in subjects with well-defined rubrics, to improve the learning experience for millions of online learners worldwide.
%U https://aclanthology.org/2025.coling-main.263/
%P 3899-3912
Markdown (Informal)
[Grading Massive Open Online Courses Using Large Language Models](https://aclanthology.org/2025.coling-main.263/) (Golchin et al., COLING 2025)
ACL