@inproceedings{bhavya-etal-2024-long,
title = "Long-Form Analogy Evaluation Challenge",
author = "Bhavya, Bhavya and
Palaguachi, Chris and
Zhou, Yang and
Bhat, Suma and
Zhai, ChengXiang",
editor = "Mille, Simon and
Clinciu, Miruna-Adriana",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-genchal.1",
pages = "1--16",
abstract = "Given the practical applications of analogies, recent work has studied analogy generation to explain concepts. However, not all generated analogies are of high quality and it is unclear how to measure the quality of this new kind of generated text. To address this challenge, we propose a shared task on automatically evaluating the quality of generated analogies based on seven comprehensive criteria. For this, we will set up a leader board based on our dataset annotated with manual ratings along the seven criteria, and provide a baseline solution leveraging GPT-4. We hope that this task would advance the progress in development of new evaluation metrics and methods for analogy generation in natural language, particularly for education.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bhavya-etal-2024-long">
<titleInfo>
<title>Long-Form Analogy Evaluation Challenge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bhavya</namePart>
<namePart type="family">Bhavya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Palaguachi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suma</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">ChengXiang</namePart>
<namePart type="family">Zhai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Mille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miruna-Adriana</namePart>
<namePart type="family">Clinciu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given the practical applications of analogies, recent work has studied analogy generation to explain concepts. However, not all generated analogies are of high quality and it is unclear how to measure the quality of this new kind of generated text. To address this challenge, we propose a shared task on automatically evaluating the quality of generated analogies based on seven comprehensive criteria. For this, we will set up a leader board based on our dataset annotated with manual ratings along the seven criteria, and provide a baseline solution leveraging GPT-4. We hope that this task would advance the progress in development of new evaluation metrics and methods for analogy generation in natural language, particularly for education.</abstract>
<identifier type="citekey">bhavya-etal-2024-long</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-genchal.1</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>1</start>
<end>16</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Long-Form Analogy Evaluation Challenge
%A Bhavya, Bhavya
%A Palaguachi, Chris
%A Zhou, Yang
%A Bhat, Suma
%A Zhai, ChengXiang
%Y Mille, Simon
%Y Clinciu, Miruna-Adriana
%S Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F bhavya-etal-2024-long
%X Given the practical applications of analogies, recent work has studied analogy generation to explain concepts. However, not all generated analogies are of high quality and it is unclear how to measure the quality of this new kind of generated text. To address this challenge, we propose a shared task on automatically evaluating the quality of generated analogies based on seven comprehensive criteria. For this, we will set up a leader board based on our dataset annotated with manual ratings along the seven criteria, and provide a baseline solution leveraging GPT-4. We hope that this task would advance the progress in development of new evaluation metrics and methods for analogy generation in natural language, particularly for education.
%U https://aclanthology.org/2024.inlg-genchal.1
%P 1-16
Markdown (Informal)
[Long-Form Analogy Evaluation Challenge](https://aclanthology.org/2024.inlg-genchal.1) (Bhavya et al., INLG 2024)
ACL
- Bhavya Bhavya, Chris Palaguachi, Yang Zhou, Suma Bhat, and ChengXiang Zhai. 2024. Long-Form Analogy Evaluation Challenge. In Proceedings of the 17th International Natural Language Generation Conference: Generation Challenges, pages 1–16, Tokyo, Japan. Association for Computational Linguistics.