@inproceedings{zhang-etal-2023-hit,
title = "{HIT}-{MI}{\&}{T} Lab{'}s Submission to {E}val4{NLP} 2023 Shared Task",
author = "Zhang, Rui and
Song, Fuhai and
Huang, Hui and
Yuan, Jinghao and
Yang, Muyun and
Zhao, Tiejun",
editor = {Deutsch, Daniel and
Dror, Rotem and
Eger, Steffen and
Gao, Yang and
Leiter, Christoph and
Opitz, Juri and
R{\"u}ckl{\'e}, Andreas},
booktitle = "Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems",
month = nov,
year = "2023",
address = "Bali, Indonesia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eval4nlp-1.11",
doi = "10.18653/v1/2023.eval4nlp-1.11",
pages = "139--148",
abstract = "Recently, Large Language Models (LLMs) have boosted the research in natural language processing and shown impressive capabilities across numerous domains, including machine translation evaluation. This paper presents our methods developed for the machine translation evaluation sub-task of the Eval4NLP 2023 Shared Task. Based on the provided LLMs, we propose a generation-based method as well as a probability-based method to perform evaluation, explore different strategies when selecting the demonstrations for in-context learning, and try different ensemble methods to further improve the evaluation accuracy. The experiment results on the development set and test set demonstrate the effectiveness of our proposed method.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhang-etal-2023-hit">
<titleInfo>
<title>HIT-MI&T Lab’s Submission to Eval4NLP 2023 Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fuhai</namePart>
<namePart type="family">Song</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hui</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jinghao</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muyun</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiejun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Deutsch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rotem</namePart>
<namePart type="family">Dror</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Gao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Leiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juri</namePart>
<namePart type="family">Opitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Rücklé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bali, Indonesia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Recently, Large Language Models (LLMs) have boosted the research in natural language processing and shown impressive capabilities across numerous domains, including machine translation evaluation. This paper presents our methods developed for the machine translation evaluation sub-task of the Eval4NLP 2023 Shared Task. Based on the provided LLMs, we propose a generation-based method as well as a probability-based method to perform evaluation, explore different strategies when selecting the demonstrations for in-context learning, and try different ensemble methods to further improve the evaluation accuracy. The experiment results on the development set and test set demonstrate the effectiveness of our proposed method.</abstract>
<identifier type="citekey">zhang-etal-2023-hit</identifier>
<identifier type="doi">10.18653/v1/2023.eval4nlp-1.11</identifier>
<location>
<url>https://aclanthology.org/2023.eval4nlp-1.11</url>
</location>
<part>
<date>2023-11</date>
<extent unit="page">
<start>139</start>
<end>148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HIT-MI&T Lab’s Submission to Eval4NLP 2023 Shared Task
%A Zhang, Rui
%A Song, Fuhai
%A Huang, Hui
%A Yuan, Jinghao
%A Yang, Muyun
%A Zhao, Tiejun
%Y Deutsch, Daniel
%Y Dror, Rotem
%Y Eger, Steffen
%Y Gao, Yang
%Y Leiter, Christoph
%Y Opitz, Juri
%Y Rücklé, Andreas
%S Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems
%D 2023
%8 November
%I Association for Computational Linguistics
%C Bali, Indonesia
%F zhang-etal-2023-hit
%X Recently, Large Language Models (LLMs) have boosted the research in natural language processing and shown impressive capabilities across numerous domains, including machine translation evaluation. This paper presents our methods developed for the machine translation evaluation sub-task of the Eval4NLP 2023 Shared Task. Based on the provided LLMs, we propose a generation-based method as well as a probability-based method to perform evaluation, explore different strategies when selecting the demonstrations for in-context learning, and try different ensemble methods to further improve the evaluation accuracy. The experiment results on the development set and test set demonstrate the effectiveness of our proposed method.
%R 10.18653/v1/2023.eval4nlp-1.11
%U https://aclanthology.org/2023.eval4nlp-1.11
%U https://doi.org/10.18653/v1/2023.eval4nlp-1.11
%P 139-148
Markdown (Informal)
[HIT-MI&T Lab’s Submission to Eval4NLP 2023 Shared Task](https://aclanthology.org/2023.eval4nlp-1.11) (Zhang et al., Eval4NLP-WS 2023)
ACL
- Rui Zhang, Fuhai Song, Hui Huang, Jinghao Yuan, Muyun Yang, and Tiejun Zhao. 2023. HIT-MI&T Lab’s Submission to Eval4NLP 2023 Shared Task. In Proceedings of the 4th Workshop on Evaluation and Comparison of NLP Systems, pages 139–148, Bali, Indonesia. Association for Computational Linguistics.