@inproceedings{larionov-etal-2023-team,
title = "Team {NLLG} submission for {E}val4{NLP} 2023 Shared Task: Retrieval-Augmented In-Context Learning for {NLG} Evaluation",
author = "Larionov, Daniil and
Viskov, Vasiliy and
Kokush, George and
Panchenko, Alexander and
Eger, Steffen",
editor = {Deutsch, Daniel and
Dror, Rotem and
Eger, Steffen and
Gao, Yang and
Leiter, Christoph and
Opitz, Juri and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2023",
address = "Bali, Indonesia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eval4nlp-1.19",
doi = "10.18653/v1/2023.eval4nlp-1.19",
pages = "228--234",
abstract = "In this paper, we propose a retrieval-augmented in-context learning for natural language generation (NLG) evaluation. This method allows practitioners to utilize large language models (LLMs) for various NLG evaluation tasks without any fine-tuning. We apply our approach to Eval4NLP 2023 Shared Task in translation evaluation and summarization evaluation subtasks. The findings suggest that retrieval-augmented in-context learning is a promising approach for creating LLM-based evaluation metrics for NLG. Further research directions include exploring the performance of various publicly available LLM models and identifying which LLM properties help boost the quality of the metric.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="larionov-etal-2023-team">
<titleInfo>
<title>Team NLLG submission for Eval4NLP 2023 Shared Task: Retrieval-Augmented In-Context Learning for NLG Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniil</namePart>
<namePart type="family">Larionov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasiliy</namePart>
<namePart type="family">Viskov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Kokush</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Deutsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Leiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juri</namePart>
<namePart type="family">Opitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bali, Indonesia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we propose a retrieval-augmented in-context learning for natural language generation (NLG) evaluation. This method allows practitioners to utilize large language models (LLMs) for various NLG evaluation tasks without any fine-tuning. We apply our approach to Eval4NLP 2023 Shared Task in translation evaluation and summarization evaluation subtasks. The findings suggest that retrieval-augmented in-context learning is a promising approach for creating LLM-based evaluation metrics for NLG. Further research directions include exploring the performance of various publicly available LLM models and identifying which LLM properties help boost the quality of the metric.</abstract>
<identifier type="citekey">larionov-etal-2023-team</identifier>
<identifier type="doi">10.18653/v1/2023.eval4nlp-1.19</identifier>
<location>
<url>https://aclanthology.org/2023.eval4nlp-1.19</url>
</location>
<part>
<date>2023-11</date>
<extent unit="page">
<start>228</start>
<end>234</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Team NLLG submission for Eval4NLP 2023 Shared Task: Retrieval-Augmented In-Context Learning for NLG Evaluation
%A Larionov, Daniil
%A Viskov, Vasiliy
%A Kokush, George
%A Panchenko, Alexander
%A Eger, Steffen
%Y Deutsch, Daniel
%Y Dror, Rotem
%Y Eger, Steffen
%Y Gao, Yang
%Y Leiter, Christoph
%Y Opitz, Juri
%Y Rücklé, Andreas
%S Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems
%D 2023
%8 November
%I Association for Computational Linguistics
%C Bali, Indonesia
%F larionov-etal-2023-team
%X In this paper, we propose a retrieval-augmented in-context learning for natural language generation (NLG) evaluation. This method allows practitioners to utilize large language models (LLMs) for various NLG evaluation tasks without any fine-tuning. We apply our approach to Eval4NLP 2023 Shared Task in translation evaluation and summarization evaluation subtasks. The findings suggest that retrieval-augmented in-context learning is a promising approach for creating LLM-based evaluation metrics for NLG. Further research directions include exploring the performance of various publicly available LLM models and identifying which LLM properties help boost the quality of the metric.
%R 10.18653/v1/2023.eval4nlp-1.19
%U https://aclanthology.org/2023.eval4nlp-1.19
%U https://doi.org/10.18653/v1/2023.eval4nlp-1.19
%P 228-234
Markdown (Informal)
[Team NLLG submission for Eval4NLP 2023 Shared Task: Retrieval-Augmented In-Context Learning for NLG Evaluation](https://aclanthology.org/2023.eval4nlp-1.19) (Larionov et al., Eval4NLP-WS 2023)
ACL