@inproceedings{chunkang-etal-2024-pattern,
title = "Pattern Shifting or Knowledge Losing? A Forgetting Perspective for Understanding the Effect of Instruction Fine-Tuning",
author = "Chunkang, Zhang and
Boxi, Cao and
Yaojie, Lu and
Hongyu, Lin and
Liu, Cao and
Ke, Zeng and
Guanglu, Wan and
Xunliang, Cai and
Xianpei, Han and
Le, Sun",
editor = "Sun, Maosong and
Liang, Jiye and
Han, Xianpei and
Liu, Zhiyuan and
He, Yulan",
booktitle = "Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)",
month = jul,
year = "2024",
address = "Taiyuan, China",
publisher = "Chinese Information Processing Society of China",
url = "https://aclanthology.org/2024.ccl-1.106/",
pages = "1381--1394",
language = "eng",
abstract = "{\textquotedblleft}Instruction Fine-Tuning(IFT) emerges as an essential step of training large language models torobustly carry out tasks of interest. However, there lacks a systematic investigation about theunderlying mechanisms of instruction fine-tuning, particularly on the forgetting phenomenonafter IFT, known as alignment tax. Therefore, to understand the mechanism of IFT from theforgetting perspective, we investigate the alternation of the text pattern and knowledge withinmodels throughout the entire IFT process. Specifically, we restore fine-tuned models to their baseversion by training them on the data sharing a similar distribution with the pre-training corpusand compare their results Our experiment indicates that there is a stage transition of forgettingduring IFT process: (1) Pseudo Forgetting: in this stage, models mainly shift their familiar textpattern away from pre-training data format while the world knowledge is preserved. Consequently,models will recover to their original performance when they are restored to the base version. (2)Actual Forgetting: in this stage, models forget the acquired knowledge as well. Therefore, theyfail to reach the original performance even if they are restored to the base version.{\textquotedblright}"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chunkang-etal-2024-pattern">
<titleInfo>
<title>Pattern Shifting or Knowledge Losing? A Forgetting Perspective for Understanding the Effect of Instruction Fine-Tuning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhang</namePart>
<namePart type="family">Chunkang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cao</namePart>
<namePart type="family">Boxi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Yaojie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lin</namePart>
<namePart type="family">Hongyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cao</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zeng</namePart>
<namePart type="family">Ke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wan</namePart>
<namePart type="family">Guanglu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cai</namePart>
<namePart type="family">Xunliang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Han</namePart>
<namePart type="family">Xianpei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sun</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maosong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiye</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xianpei</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhiyuan</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Chinese Information Processing Society of China</publisher>
<place>
<placeTerm type="text">Taiyuan, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>“Instruction Fine-Tuning(IFT) emerges as an essential step of training large language models torobustly carry out tasks of interest. However, there lacks a systematic investigation about theunderlying mechanisms of instruction fine-tuning, particularly on the forgetting phenomenonafter IFT, known as alignment tax. Therefore, to understand the mechanism of IFT from theforgetting perspective, we investigate the alternation of the text pattern and knowledge withinmodels throughout the entire IFT process. Specifically, we restore fine-tuned models to their baseversion by training them on the data sharing a similar distribution with the pre-training corpusand compare their results Our experiment indicates that there is a stage transition of forgettingduring IFT process: (1) Pseudo Forgetting: in this stage, models mainly shift their familiar textpattern away from pre-training data format while the world knowledge is preserved. Consequently,models will recover to their original performance when they are restored to the base version. (2)Actual Forgetting: in this stage, models forget the acquired knowledge as well. Therefore, theyfail to reach the original performance even if they are restored to the base version.”</abstract>
<identifier type="citekey">chunkang-etal-2024-pattern</identifier>
<location>
<url>https://aclanthology.org/2024.ccl-1.106/</url>
</location>
<part>
<date>2024-07</date>
<extent unit="page">
<start>1381</start>
<end>1394</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Pattern Shifting or Knowledge Losing? A Forgetting Perspective for Understanding the Effect of Instruction Fine-Tuning
%A Chunkang, Zhang
%A Boxi, Cao
%A Yaojie, Lu
%A Hongyu, Lin
%A Liu, Cao
%A Ke, Zeng
%A Guanglu, Wan
%A Xunliang, Cai
%A Xianpei, Han
%A Le, Sun
%Y Sun, Maosong
%Y Liang, Jiye
%Y Han, Xianpei
%Y Liu, Zhiyuan
%Y He, Yulan
%S Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference)
%D 2024
%8 July
%I Chinese Information Processing Society of China
%C Taiyuan, China
%G eng
%F chunkang-etal-2024-pattern
%X “Instruction Fine-Tuning(IFT) emerges as an essential step of training large language models torobustly carry out tasks of interest. However, there lacks a systematic investigation about theunderlying mechanisms of instruction fine-tuning, particularly on the forgetting phenomenonafter IFT, known as alignment tax. Therefore, to understand the mechanism of IFT from theforgetting perspective, we investigate the alternation of the text pattern and knowledge withinmodels throughout the entire IFT process. Specifically, we restore fine-tuned models to their baseversion by training them on the data sharing a similar distribution with the pre-training corpusand compare their results Our experiment indicates that there is a stage transition of forgettingduring IFT process: (1) Pseudo Forgetting: in this stage, models mainly shift their familiar textpattern away from pre-training data format while the world knowledge is preserved. Consequently,models will recover to their original performance when they are restored to the base version. (2)Actual Forgetting: in this stage, models forget the acquired knowledge as well. Therefore, theyfail to reach the original performance even if they are restored to the base version.”
%U https://aclanthology.org/2024.ccl-1.106/
%P 1381-1394
Markdown (Informal)
[Pattern Shifting or Knowledge Losing? A Forgetting Perspective for Understanding the Effect of Instruction Fine-Tuning](https://aclanthology.org/2024.ccl-1.106/) (Chunkang et al., CCL 2024)
ACL
- Zhang Chunkang, Cao Boxi, Lu Yaojie, Lin Hongyu, Cao Liu, Zeng Ke, Wan Guanglu, Cai Xunliang, Han Xianpei, and Sun Le. 2024. Pattern Shifting or Knowledge Losing? A Forgetting Perspective for Understanding the Effect of Instruction Fine-Tuning. In Proceedings of the 23rd Chinese National Conference on Computational Linguistics (Volume 1: Main Conference), pages 1381–1394, Taiyuan, China. Chinese Information Processing Society of China.