@inproceedings{yu-etal-2024-automating-true,
title = "Automating True-False Multiple-Choice Question Generation and Evaluation with Retrieval-based Accuracy Differential",
author = "Yu, Chen-Jui and
Lee, Wen Hung and
Ke, Lin Tse and
Guo, Shih-Wei and
Fan, Yao-Chung",
editor = "Mahamood, Saad and
Minh, Nguyen Le and
Ippolito, Daphne",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-main.16",
pages = "198--212",
abstract = "Creating high-quality True-False (TF) multiple-choice questions (MCQs), with accurate distractors, is a challenging and time-consuming task in education. This paper introduces True-False Distractor Generation (TFDG), a pipeline that leverages pre-trained language models and sentence retrieval techniques to automate the generation of TF-type MCQ distractors. Furthermore, the evaluation of generated TF questions presents a challenge. Traditional metrics like BLEU and ROUGE are unsuitable for this task. To address this, we propose a new evaluation metric called Retrieval-based Accuracy Differential (RAD). RAD assesses the discriminative power of TF questions by comparing model accuracy with and without access to reference texts. It quantitatively evaluates how well questions differentiate between students with varying knowledge levels. This research benefits educators and assessment developers, facilitating the efficient automatic generation of high-quality TF-type MCQs and their reliable evaluation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yu-etal-2024-automating-true">
<titleInfo>
<title>Automating True-False Multiple-Choice Question Generation and Evaluation with Retrieval-based Accuracy Differential</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chen-Jui</namePart>
<namePart type="family">Yu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="given">Hung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="given">Tse</namePart>
<namePart type="family">Ke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shih-Wei</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yao-Chung</namePart>
<namePart type="family">Fan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Minh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Creating high-quality True-False (TF) multiple-choice questions (MCQs), with accurate distractors, is a challenging and time-consuming task in education. This paper introduces True-False Distractor Generation (TFDG), a pipeline that leverages pre-trained language models and sentence retrieval techniques to automate the generation of TF-type MCQ distractors. Furthermore, the evaluation of generated TF questions presents a challenge. Traditional metrics like BLEU and ROUGE are unsuitable for this task. To address this, we propose a new evaluation metric called Retrieval-based Accuracy Differential (RAD). RAD assesses the discriminative power of TF questions by comparing model accuracy with and without access to reference texts. It quantitatively evaluates how well questions differentiate between students with varying knowledge levels. This research benefits educators and assessment developers, facilitating the efficient automatic generation of high-quality TF-type MCQs and their reliable evaluation.</abstract>
<identifier type="citekey">yu-etal-2024-automating-true</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-main.16</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>198</start>
<end>212</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automating True-False Multiple-Choice Question Generation and Evaluation with Retrieval-based Accuracy Differential
%A Yu, Chen-Jui
%A Lee, Wen Hung
%A Ke, Lin Tse
%A Guo, Shih-Wei
%A Fan, Yao-Chung
%Y Mahamood, Saad
%Y Minh, Nguyen Le
%Y Ippolito, Daphne
%S Proceedings of the 17th International Natural Language Generation Conference
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F yu-etal-2024-automating-true
%X Creating high-quality True-False (TF) multiple-choice questions (MCQs), with accurate distractors, is a challenging and time-consuming task in education. This paper introduces True-False Distractor Generation (TFDG), a pipeline that leverages pre-trained language models and sentence retrieval techniques to automate the generation of TF-type MCQ distractors. Furthermore, the evaluation of generated TF questions presents a challenge. Traditional metrics like BLEU and ROUGE are unsuitable for this task. To address this, we propose a new evaluation metric called Retrieval-based Accuracy Differential (RAD). RAD assesses the discriminative power of TF questions by comparing model accuracy with and without access to reference texts. It quantitatively evaluates how well questions differentiate between students with varying knowledge levels. This research benefits educators and assessment developers, facilitating the efficient automatic generation of high-quality TF-type MCQs and their reliable evaluation.
%U https://aclanthology.org/2024.inlg-main.16
%P 198-212
Markdown (Informal)
[Automating True-False Multiple-Choice Question Generation and Evaluation with Retrieval-based Accuracy Differential](https://aclanthology.org/2024.inlg-main.16) (Yu et al., INLG 2024)
ACL