@inproceedings{duan-etal-2024-exploring,
title = "Exploring the Relationship between In-Context Learning and Instruction Tuning",
author = "Duan, Hanyu and
Tang, Yixuan and
Yang, Yi and
Abbasi, Ahmed and
Tam, Kar Yan",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.182",
pages = "3197--3210",
abstract = "In-Context Learning (ICL) and Instruction Tuning (IT) are two primary paradigms of adopting Large Language Models (LLMs) to downstream applications. However, they are significantly different. In ICL, a set of demonstrations is provided at the inference time, but the LLM{'}s parameters are not updated. In IT, a set of demonstrations is used to adjust the parameters of the LLM during training, but no demonstrations are provided at the inference time. Although a growing body of literature has explored ICL and IT, studies on these topics have largely been conducted in isolation, leading to a disconnect between these two paradigms. In this work, we explore the relationship between ICL and IT by examining how the hidden states of LLMs change in these two paradigms. Through carefully designed experiments conducted with LLaMA-2 and LLaMA-2-Chat (7B and 13B), we find that ICL and IT converge in LLM hidden states despite their apparent differences in implementation. Specifically, ICL changes an LLM{'}s hidden states as if its accompanying demonstrations were used to instructionally tune the model. Furthermore, the convergence between ICL and IT is largely contingent upon several factors related to the demonstration. Overall, this work offers a unique perspective to explore the connection between ICL and IT and sheds light on understanding the behaviors of LLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="duan-etal-2024-exploring">
<titleInfo>
<title>Exploring the Relationship between In-Context Learning and Instruction Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanyu</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixuan</namePart>
<namePart type="family">Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abbasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kar</namePart>
<namePart type="given">Yan</namePart>
<namePart type="family">Tam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In-Context Learning (ICL) and Instruction Tuning (IT) are two primary paradigms of adopting Large Language Models (LLMs) to downstream applications. However, they are significantly different. In ICL, a set of demonstrations is provided at the inference time, but the LLM’s parameters are not updated. In IT, a set of demonstrations is used to adjust the parameters of the LLM during training, but no demonstrations are provided at the inference time. Although a growing body of literature has explored ICL and IT, studies on these topics have largely been conducted in isolation, leading to a disconnect between these two paradigms. In this work, we explore the relationship between ICL and IT by examining how the hidden states of LLMs change in these two paradigms. Through carefully designed experiments conducted with LLaMA-2 and LLaMA-2-Chat (7B and 13B), we find that ICL and IT converge in LLM hidden states despite their apparent differences in implementation. Specifically, ICL changes an LLM’s hidden states as if its accompanying demonstrations were used to instructionally tune the model. Furthermore, the convergence between ICL and IT is largely contingent upon several factors related to the demonstration. Overall, this work offers a unique perspective to explore the connection between ICL and IT and sheds light on understanding the behaviors of LLMs.</abstract>
<identifier type="citekey">duan-etal-2024-exploring</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.182</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>3197</start>
<end>3210</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Exploring the Relationship between In-Context Learning and Instruction Tuning
%A Duan, Hanyu
%A Tang, Yixuan
%A Yang, Yi
%A Abbasi, Ahmed
%A Tam, Kar Yan
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F duan-etal-2024-exploring
%X In-Context Learning (ICL) and Instruction Tuning (IT) are two primary paradigms of adopting Large Language Models (LLMs) to downstream applications. However, they are significantly different. In ICL, a set of demonstrations is provided at the inference time, but the LLM’s parameters are not updated. In IT, a set of demonstrations is used to adjust the parameters of the LLM during training, but no demonstrations are provided at the inference time. Although a growing body of literature has explored ICL and IT, studies on these topics have largely been conducted in isolation, leading to a disconnect between these two paradigms. In this work, we explore the relationship between ICL and IT by examining how the hidden states of LLMs change in these two paradigms. Through carefully designed experiments conducted with LLaMA-2 and LLaMA-2-Chat (7B and 13B), we find that ICL and IT converge in LLM hidden states despite their apparent differences in implementation. Specifically, ICL changes an LLM’s hidden states as if its accompanying demonstrations were used to instructionally tune the model. Furthermore, the convergence between ICL and IT is largely contingent upon several factors related to the demonstration. Overall, this work offers a unique perspective to explore the connection between ICL and IT and sheds light on understanding the behaviors of LLMs.
%U https://aclanthology.org/2024.findings-emnlp.182
%P 3197-3210
Markdown (Informal)
[Exploring the Relationship between In-Context Learning and Instruction Tuning](https://aclanthology.org/2024.findings-emnlp.182) (Duan et al., Findings 2024)
ACL