@inproceedings{wu-etal-2025-seeval,
title = "{SEE}val: Advancing {LLM} Text Evaluation Efficiency and Accuracy through Self-Explanation Prompting",
author = "Wu, Meng-Chen and
Hossain, Md Mosharaf and
Wood, Tess and
Akbar, Shayan Ali and
Chin, Si-Chi and
Cornejo, Erwin",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.411/",
doi = "10.18653/v1/2025.findings-naacl.411",
pages = "7357--7368",
ISBN = "979-8-89176-195-7",
abstract = "Large language models (LLMs) have achieved remarkable success in various natural language generation (NLG) tasks, but their performance in automatic text evaluation is not yet ready as human replacements. In this paper, we propose SEEval (Self-Explanation in Evaluation), a novel prompt-based text evaluator. Inspired by educational psychology, SEEval incorporates self-explanation, a metacognitive strategy, to enhance automatic text evaluation. Our experimental results show that SEEval, without probability normalization, is able to achieve competitive and often superior performance compared to the two state-of-the-art baselines {--} G-Eval and Analyze-Rate {--} across all evaluation dimensions and is 20 times more efficient in terms of run-time. The SEEval method is also generalizable as its results are consistent across three other selected LLMs {--} Claude 3.5 Sonnet, Command R+, and Mistral-Large 2."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wu-etal-2025-seeval">
<titleInfo>
<title>SEEval: Advancing LLM Text Evaluation Efficiency and Accuracy through Self-Explanation Prompting</title>
</titleInfo>
<name type="personal">
<namePart type="given">Meng-Chen</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Mosharaf</namePart>
<namePart type="family">Hossain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tess</namePart>
<namePart type="family">Wood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shayan</namePart>
<namePart type="given">Ali</namePart>
<namePart type="family">Akbar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Si-Chi</namePart>
<namePart type="family">Chin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erwin</namePart>
<namePart type="family">Cornejo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Large language models (LLMs) have achieved remarkable success in various natural language generation (NLG) tasks, but their performance in automatic text evaluation is not yet ready as human replacements. In this paper, we propose SEEval (Self-Explanation in Evaluation), a novel prompt-based text evaluator. Inspired by educational psychology, SEEval incorporates self-explanation, a metacognitive strategy, to enhance automatic text evaluation. Our experimental results show that SEEval, without probability normalization, is able to achieve competitive and often superior performance compared to the two state-of-the-art baselines – G-Eval and Analyze-Rate – across all evaluation dimensions and is 20 times more efficient in terms of run-time. The SEEval method is also generalizable as its results are consistent across three other selected LLMs – Claude 3.5 Sonnet, Command R+, and Mistral-Large 2.</abstract>
<identifier type="citekey">wu-etal-2025-seeval</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.411</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.411/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>7357</start>
<end>7368</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SEEval: Advancing LLM Text Evaluation Efficiency and Accuracy through Self-Explanation Prompting
%A Wu, Meng-Chen
%A Hossain, Md Mosharaf
%A Wood, Tess
%A Akbar, Shayan Ali
%A Chin, Si-Chi
%A Cornejo, Erwin
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F wu-etal-2025-seeval
%X Large language models (LLMs) have achieved remarkable success in various natural language generation (NLG) tasks, but their performance in automatic text evaluation is not yet ready as human replacements. In this paper, we propose SEEval (Self-Explanation in Evaluation), a novel prompt-based text evaluator. Inspired by educational psychology, SEEval incorporates self-explanation, a metacognitive strategy, to enhance automatic text evaluation. Our experimental results show that SEEval, without probability normalization, is able to achieve competitive and often superior performance compared to the two state-of-the-art baselines – G-Eval and Analyze-Rate – across all evaluation dimensions and is 20 times more efficient in terms of run-time. The SEEval method is also generalizable as its results are consistent across three other selected LLMs – Claude 3.5 Sonnet, Command R+, and Mistral-Large 2.
%R 10.18653/v1/2025.findings-naacl.411
%U https://aclanthology.org/2025.findings-naacl.411/
%U https://doi.org/10.18653/v1/2025.findings-naacl.411
%P 7357-7368
Markdown (Informal)
[SEEval: Advancing LLM Text Evaluation Efficiency and Accuracy through Self-Explanation Prompting](https://aclanthology.org/2025.findings-naacl.411/) (Wu et al., Findings 2025)
ACL