@inproceedings{zhu-etal-2024-towards-robust,
title = "Towards Robust In-Context Learning for Machine Translation with Large Language Models",
author = "Zhu, Shaolin and
Cui, Menglong and
Xiong, Deyi",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1444",
pages = "16619--16629",
abstract = "Using large language models (LLMs) for machine translation via in-context learning (ICL) has become an interesting research direction of machine translation (MT) in recent years. Its main idea is to retrieve a few translation pairs as demonstrations from an additional datastore (parallel corpus) to guide translation without updating the LLMs. However, the underlying noise of retrieved demonstrations usually dramatically deteriorate the performance of LLMs. In this paper, we propose a robust method to enable LLMs to achieve robust translation with ICL. The method incorporates a multi-view approach, considering both sentence- and word-level information, to select demonstrations that effectively avoid noise. At the sentence level, a margin-based score is designed to avoid semantic noise. At the word level, word embeddings are utilized to evaluate the related tokens and change the weight of words in demonstrations. By considering both sentence- and word-level similarity, the proposed method provides fine-grained demonstrations that effectively prompt the translation of LLMs. Experimental results demonstrate the effectiveness of our method, particularly in domain adaptation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2024-towards-robust">
<titleInfo>
<title>Towards Robust In-Context Learning for Machine Translation with Large Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shaolin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menglong</namePart>
<namePart type="family">Cui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deyi</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using large language models (LLMs) for machine translation via in-context learning (ICL) has become an interesting research direction of machine translation (MT) in recent years. Its main idea is to retrieve a few translation pairs as demonstrations from an additional datastore (parallel corpus) to guide translation without updating the LLMs. However, the underlying noise of retrieved demonstrations usually dramatically deteriorate the performance of LLMs. In this paper, we propose a robust method to enable LLMs to achieve robust translation with ICL. The method incorporates a multi-view approach, considering both sentence- and word-level information, to select demonstrations that effectively avoid noise. At the sentence level, a margin-based score is designed to avoid semantic noise. At the word level, word embeddings are utilized to evaluate the related tokens and change the weight of words in demonstrations. By considering both sentence- and word-level similarity, the proposed method provides fine-grained demonstrations that effectively prompt the translation of LLMs. Experimental results demonstrate the effectiveness of our method, particularly in domain adaptation.</abstract>
<identifier type="citekey">zhu-etal-2024-towards-robust</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1444</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16619</start>
<end>16629</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Robust In-Context Learning for Machine Translation with Large Language Models
%A Zhu, Shaolin
%A Cui, Menglong
%A Xiong, Deyi
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F zhu-etal-2024-towards-robust
%X Using large language models (LLMs) for machine translation via in-context learning (ICL) has become an interesting research direction of machine translation (MT) in recent years. Its main idea is to retrieve a few translation pairs as demonstrations from an additional datastore (parallel corpus) to guide translation without updating the LLMs. However, the underlying noise of retrieved demonstrations usually dramatically deteriorate the performance of LLMs. In this paper, we propose a robust method to enable LLMs to achieve robust translation with ICL. The method incorporates a multi-view approach, considering both sentence- and word-level information, to select demonstrations that effectively avoid noise. At the sentence level, a margin-based score is designed to avoid semantic noise. At the word level, word embeddings are utilized to evaluate the related tokens and change the weight of words in demonstrations. By considering both sentence- and word-level similarity, the proposed method provides fine-grained demonstrations that effectively prompt the translation of LLMs. Experimental results demonstrate the effectiveness of our method, particularly in domain adaptation.
%U https://aclanthology.org/2024.lrec-main.1444
%P 16619-16629
Markdown (Informal)
[Towards Robust In-Context Learning for Machine Translation with Large Language Models](https://aclanthology.org/2024.lrec-main.1444) (Zhu et al., LREC-COLING 2024)
ACL