@inproceedings{baswani-etal-2023-ltrc-iiiths,
title = "{LTRC}{\_}{IIITH}{'}s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task",
author = "Baswani, Pavan and
Mukherjee, Ananya and
Shrivastava, Manish",
editor = {Deutsch, Daniel and
Dror, Rotem and
Eger, Steffen and
Gao, Yang and
Leiter, Christoph and
Opitz, Juri and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2023",
address = "Bali, Indonesia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eval4nlp-1.13",
doi = "10.18653/v1/2023.eval4nlp-1.13",
pages = "156--163",
abstract = "In this report, we share our contribution to the Eval4NLP Shared Task titled {``}Prompting Large Language Models as Explainable Metrics.{''} We build our prompts with a primary focus on effective prompting strategies, score-aggregation, and explainability for LLM-based metrics. We participated in the track for smaller models by submitting the scores along with their explanations. According to the Kendall correlation scores on the leaderboard, our MT evaluation submission ranks second-best, while our summarization evaluation submission ranks fourth, with only a 0.06 difference from the leading submission.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="baswani-etal-2023-ltrc-iiiths">
<titleInfo>
<title>LTRC_IIITH’s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pavan</namePart>
<namePart type="family">Baswani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ananya</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Deutsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Leiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juri</namePart>
<namePart type="family">Opitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bali, Indonesia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this report, we share our contribution to the Eval4NLP Shared Task titled “Prompting Large Language Models as Explainable Metrics.” We build our prompts with a primary focus on effective prompting strategies, score-aggregation, and explainability for LLM-based metrics. We participated in the track for smaller models by submitting the scores along with their explanations. According to the Kendall correlation scores on the leaderboard, our MT evaluation submission ranks second-best, while our summarization evaluation submission ranks fourth, with only a 0.06 difference from the leading submission.</abstract>
<identifier type="citekey">baswani-etal-2023-ltrc-iiiths</identifier>
<identifier type="doi">10.18653/v1/2023.eval4nlp-1.13</identifier>
<location>
<url>https://aclanthology.org/2023.eval4nlp-1.13</url>
</location>
<part>
<date>2023-11</date>
<extent unit="page">
<start>156</start>
<end>163</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LTRC_IIITH’s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task
%A Baswani, Pavan
%A Mukherjee, Ananya
%A Shrivastava, Manish
%Y Deutsch, Daniel
%Y Dror, Rotem
%Y Eger, Steffen
%Y Gao, Yang
%Y Leiter, Christoph
%Y Opitz, Juri
%Y Rücklé, Andreas
%S Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems
%D 2023
%8 November
%I Association for Computational Linguistics
%C Bali, Indonesia
%F baswani-etal-2023-ltrc-iiiths
%X In this report, we share our contribution to the Eval4NLP Shared Task titled “Prompting Large Language Models as Explainable Metrics.” We build our prompts with a primary focus on effective prompting strategies, score-aggregation, and explainability for LLM-based metrics. We participated in the track for smaller models by submitting the scores along with their explanations. According to the Kendall correlation scores on the leaderboard, our MT evaluation submission ranks second-best, while our summarization evaluation submission ranks fourth, with only a 0.06 difference from the leading submission.
%R 10.18653/v1/2023.eval4nlp-1.13
%U https://aclanthology.org/2023.eval4nlp-1.13
%U https://doi.org/10.18653/v1/2023.eval4nlp-1.13
%P 156-163
Markdown (Informal)
[LTRC_IIITH’s 2023 Submission for Prompting Large Language Models as Explainable Metrics Task](https://aclanthology.org/2023.eval4nlp-1.13) (Baswani et al., Eval4NLP-WS 2023)
ACL