@inproceedings{moon-etal-2024-generative,
title = "Generative Interpretation: Toward Human-Like Evaluation for Educational Question-Answer Pair Generation",
author = "Moon, Hyeonseok and
Lee, Jaewook and
Eo, Sugyeong and
Park, Chanjun and
Seo, Jaehyung and
Lim, Heuiseok",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.145",
pages = "2185--2196",
abstract = "Educational question-answer generation has been extensively researched owing to its practical applicability. However, we have identified a persistent challenge concerning the evaluation of such systems. Existing evaluation methods often fail to produce objective results and instead exhibit a bias towards favoring high similarity to the ground-truth question-answer pairs. In this study, we demonstrate that these evaluation methods yield low human alignment and propose an alternative approach called Generative Interpretation (GI) to achieve more objective evaluations. Through experimental analysis, we reveal that GI outperforms existing evaluation methods in terms of human alignment, and even shows comparable performance with GPT3.5, only with BART-large.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="moon-etal-2024-generative">
<titleInfo>
<title>Generative Interpretation: Toward Human-Like Evaluation for Educational Question-Answer Pair Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hyeonseok</namePart>
<namePart type="family">Moon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaewook</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sugyeong</namePart>
<namePart type="family">Eo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chanjun</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaehyung</namePart>
<namePart type="family">Seo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heuiseok</namePart>
<namePart type="family">Lim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Educational question-answer generation has been extensively researched owing to its practical applicability. However, we have identified a persistent challenge concerning the evaluation of such systems. Existing evaluation methods often fail to produce objective results and instead exhibit a bias towards favoring high similarity to the ground-truth question-answer pairs. In this study, we demonstrate that these evaluation methods yield low human alignment and propose an alternative approach called Generative Interpretation (GI) to achieve more objective evaluations. Through experimental analysis, we reveal that GI outperforms existing evaluation methods in terms of human alignment, and even shows comparable performance with GPT3.5, only with BART-large.</abstract>
<identifier type="citekey">moon-etal-2024-generative</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.145</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>2185</start>
<end>2196</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Generative Interpretation: Toward Human-Like Evaluation for Educational Question-Answer Pair Generation
%A Moon, Hyeonseok
%A Lee, Jaewook
%A Eo, Sugyeong
%A Park, Chanjun
%A Seo, Jaehyung
%A Lim, Heuiseok
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F moon-etal-2024-generative
%X Educational question-answer generation has been extensively researched owing to its practical applicability. However, we have identified a persistent challenge concerning the evaluation of such systems. Existing evaluation methods often fail to produce objective results and instead exhibit a bias towards favoring high similarity to the ground-truth question-answer pairs. In this study, we demonstrate that these evaluation methods yield low human alignment and propose an alternative approach called Generative Interpretation (GI) to achieve more objective evaluations. Through experimental analysis, we reveal that GI outperforms existing evaluation methods in terms of human alignment, and even shows comparable performance with GPT3.5, only with BART-large.
%U https://aclanthology.org/2024.findings-eacl.145
%P 2185-2196
Markdown (Informal)
[Generative Interpretation: Toward Human-Like Evaluation for Educational Question-Answer Pair Generation](https://aclanthology.org/2024.findings-eacl.145) (Moon et al., Findings 2024)
ACL