@inproceedings{boonsarngsuk-etal-2025-evaluating,
title = "Evaluating Sampling Strategies for Similarity-Based Short Answer Scoring: a Case Study in {T}hailand",
author = "Boonsarngsuk, Pachara and
Arpanantikul, Pacharapon and
Hiranwipas, Supakorn and
Watcharakajorn, Wipu and
Chuangsuwanich, Ekapol",
editor = "Wijaya, Derry and
Aji, Alham Fikri and
Vania, Clara and
Winata, Genta Indra and
Purwarianti, Ayu",
booktitle = "Proceedings of the Second Workshop in South East Asian Language Processing",
month = jan,
year = "2025",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.sealp-1.3/",
pages = "27--41",
abstract = "Automatic short answer scoring is a task whose aim is to help grade written works by learners of some subject matter. In niche subject domains with small examples, existing methods primarily utilized similarity-based scoring, relying on predefined reference answers to grade each student`s answer based on the similarity to the reference. However, these reference answers are often generated from a randomly selected set of graded student answer, which may fail to represent the full range of scoring variations. We propose a semi-automatic scoring framework that enhances the selective sampling strategy for defining the reference answers through a K-center-based and a K-means-based sampling method. Our results demonstrate that our framework outperforms previous similarity-based scoring methods on a dataset with Thai and English. Moreover, it achieves competitive performance compared to human reference performance and LLMs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="boonsarngsuk-etal-2025-evaluating">
<titleInfo>
<title>Evaluating Sampling Strategies for Similarity-Based Short Answer Scoring: a Case Study in Thailand</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pachara</namePart>
<namePart type="family">Boonsarngsuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pacharapon</namePart>
<namePart type="family">Arpanantikul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Supakorn</namePart>
<namePart type="family">Hiranwipas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wipu</namePart>
<namePart type="family">Watcharakajorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekapol</namePart>
<namePart type="family">Chuangsuwanich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop in South East Asian Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Derry</namePart>
<namePart type="family">Wijaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alham</namePart>
<namePart type="given">Fikri</namePart>
<namePart type="family">Aji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clara</namePart>
<namePart type="family">Vania</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Genta</namePart>
<namePart type="given">Indra</namePart>
<namePart type="family">Winata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayu</namePart>
<namePart type="family">Purwarianti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic short answer scoring is a task whose aim is to help grade written works by learners of some subject matter. In niche subject domains with small examples, existing methods primarily utilized similarity-based scoring, relying on predefined reference answers to grade each student‘s answer based on the similarity to the reference. However, these reference answers are often generated from a randomly selected set of graded student answer, which may fail to represent the full range of scoring variations. We propose a semi-automatic scoring framework that enhances the selective sampling strategy for defining the reference answers through a K-center-based and a K-means-based sampling method. Our results demonstrate that our framework outperforms previous similarity-based scoring methods on a dataset with Thai and English. Moreover, it achieves competitive performance compared to human reference performance and LLMs.</abstract>
<identifier type="citekey">boonsarngsuk-etal-2025-evaluating</identifier>
<location>
<url>https://aclanthology.org/2025.sealp-1.3/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>27</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluating Sampling Strategies for Similarity-Based Short Answer Scoring: a Case Study in Thailand
%A Boonsarngsuk, Pachara
%A Arpanantikul, Pacharapon
%A Hiranwipas, Supakorn
%A Watcharakajorn, Wipu
%A Chuangsuwanich, Ekapol
%Y Wijaya, Derry
%Y Aji, Alham Fikri
%Y Vania, Clara
%Y Winata, Genta Indra
%Y Purwarianti, Ayu
%S Proceedings of the Second Workshop in South East Asian Language Processing
%D 2025
%8 January
%I Association for Computational Linguistics
%C Online
%F boonsarngsuk-etal-2025-evaluating
%X Automatic short answer scoring is a task whose aim is to help grade written works by learners of some subject matter. In niche subject domains with small examples, existing methods primarily utilized similarity-based scoring, relying on predefined reference answers to grade each student‘s answer based on the similarity to the reference. However, these reference answers are often generated from a randomly selected set of graded student answer, which may fail to represent the full range of scoring variations. We propose a semi-automatic scoring framework that enhances the selective sampling strategy for defining the reference answers through a K-center-based and a K-means-based sampling method. Our results demonstrate that our framework outperforms previous similarity-based scoring methods on a dataset with Thai and English. Moreover, it achieves competitive performance compared to human reference performance and LLMs.
%U https://aclanthology.org/2025.sealp-1.3/
%P 27-41
Markdown (Informal)
[Evaluating Sampling Strategies for Similarity-Based Short Answer Scoring: a Case Study in Thailand](https://aclanthology.org/2025.sealp-1.3/) (Boonsarngsuk et al., sealp 2025)
ACL