@inproceedings{zhu-etal-2024-multilingual-contrastive,
title = "Multilingual Contrastive Decoding via Language-Agnostic Layers Skipping",
author = "Zhu, Wenhao and
Liu, Sizhe and
Huang, Shujian and
She, Shuaijie and
Wendler, Chris and
Chen, Jiajun",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.512",
pages = "8775--8782",
abstract = "Decoding by contrasting layers (DoLa), is designed to improve the generation quality of large language models (LLMs) by contrasting the prediction probabilities between an early exit output (amateur logits) and the final output (expert logits).However, we find that this approach does not work well on non-English tasks.Inspired by previous interpretability work on language transition during the model{'}s forward pass, we discover that this issue arises from a language mismatch between early exit output and final output.In this work, we propose an improved contrastive decoding algorithm that is effective for diverse languages beyond English.To obtain more helpful amateur logits, we devise two strategies to skip a set of bottom, language-agnostic layers based on our preliminary analysis.Experimental results on multilingual reasoning benchmarks demonstrate that our proposed method outperforms previous contrastive decoding baselines and substantially improves LLM{'}s chain-of-thought reasoning accuracy across 11 languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhu-etal-2024-multilingual-contrastive">
<titleInfo>
<title>Multilingual Contrastive Decoding via Language-Agnostic Layers Skipping</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wenhao</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sizhe</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shujian</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuaijie</namePart>
<namePart type="family">She</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Wendler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Decoding by contrasting layers (DoLa), is designed to improve the generation quality of large language models (LLMs) by contrasting the prediction probabilities between an early exit output (amateur logits) and the final output (expert logits).However, we find that this approach does not work well on non-English tasks.Inspired by previous interpretability work on language transition during the model’s forward pass, we discover that this issue arises from a language mismatch between early exit output and final output.In this work, we propose an improved contrastive decoding algorithm that is effective for diverse languages beyond English.To obtain more helpful amateur logits, we devise two strategies to skip a set of bottom, language-agnostic layers based on our preliminary analysis.Experimental results on multilingual reasoning benchmarks demonstrate that our proposed method outperforms previous contrastive decoding baselines and substantially improves LLM’s chain-of-thought reasoning accuracy across 11 languages.</abstract>
<identifier type="citekey">zhu-etal-2024-multilingual-contrastive</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.512</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>8775</start>
<end>8782</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual Contrastive Decoding via Language-Agnostic Layers Skipping
%A Zhu, Wenhao
%A Liu, Sizhe
%A Huang, Shujian
%A She, Shuaijie
%A Wendler, Chris
%A Chen, Jiajun
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F zhu-etal-2024-multilingual-contrastive
%X Decoding by contrasting layers (DoLa), is designed to improve the generation quality of large language models (LLMs) by contrasting the prediction probabilities between an early exit output (amateur logits) and the final output (expert logits).However, we find that this approach does not work well on non-English tasks.Inspired by previous interpretability work on language transition during the model’s forward pass, we discover that this issue arises from a language mismatch between early exit output and final output.In this work, we propose an improved contrastive decoding algorithm that is effective for diverse languages beyond English.To obtain more helpful amateur logits, we devise two strategies to skip a set of bottom, language-agnostic layers based on our preliminary analysis.Experimental results on multilingual reasoning benchmarks demonstrate that our proposed method outperforms previous contrastive decoding baselines and substantially improves LLM’s chain-of-thought reasoning accuracy across 11 languages.
%U https://aclanthology.org/2024.findings-emnlp.512
%P 8775-8782
Markdown (Informal)
[Multilingual Contrastive Decoding via Language-Agnostic Layers Skipping](https://aclanthology.org/2024.findings-emnlp.512) (Zhu et al., Findings 2024)
ACL