@inproceedings{zhao-etal-2025-task,
title = "Task Knowledge Injection via Interpolations and Reinstatement for Large Language Model Generalization",
author = "Zhao, Yukun and
Yan, Lingyong and
Li, Zhenyang and
Wang, Shuaiqiang and
Chen, Zhumin and
Ren, Zhaochun and
Yin, Dawei",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.780/",
doi = "10.18653/v1/2025.findings-acl.780",
pages = "15070--15080",
ISBN = "979-8-89176-256-5",
abstract = "Large language models have shown tremendous potential across various NLP tasks, and instruction tuning has been widely adopted to elicit their superior performance. However, instruction tuning may overly tailor the models to task-specific formats, potentially compromising their generalization on unseen tasks. We attribute the issue to the spurious correlations learned between inputs and targets. We propose explicit task knowledge injection to mitigate these shortcuts with latent task adaptation and knowledge reinstatement. Latent tasks serve as interpolations between new tasks and facilitate knowledge sharing with joint adaptation enabling the model to build task knowledge more smoothly. Knowledge reinstatement helps optimize building new knowledge with prior knowledge. Specifically, we retrieve input-relevant latent tasks and jointly learn the task and the relevant latent tasks. Moreover, we prompt the model to recall the forms of inputs corresponding to the target and build the task knowledge through the reinstatement of prior knowledge while learning the new task.We conduct extensive experiments on state-of-the-art large language models including Llama3.1-8B and Vicuna-13B across 1000+ instruction-following tasks to demonstrate the effectiveness of our method. The results demonstrate our method improves generalization on both in-domain and out-of-domain unseen tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2025-task">
<titleInfo>
<title>Task Knowledge Injection via Interpolations and Reinstatement for Large Language Model Generalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yukun</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lingyong</namePart>
<namePart type="family">Yan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenyang</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuaiqiang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhumin</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhaochun</namePart>
<namePart type="family">Ren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dawei</namePart>
<namePart type="family">Yin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>Large language models have shown tremendous potential across various NLP tasks, and instruction tuning has been widely adopted to elicit their superior performance. However, instruction tuning may overly tailor the models to task-specific formats, potentially compromising their generalization on unseen tasks. We attribute the issue to the spurious correlations learned between inputs and targets. We propose explicit task knowledge injection to mitigate these shortcuts with latent task adaptation and knowledge reinstatement. Latent tasks serve as interpolations between new tasks and facilitate knowledge sharing with joint adaptation enabling the model to build task knowledge more smoothly. Knowledge reinstatement helps optimize building new knowledge with prior knowledge. Specifically, we retrieve input-relevant latent tasks and jointly learn the task and the relevant latent tasks. Moreover, we prompt the model to recall the forms of inputs corresponding to the target and build the task knowledge through the reinstatement of prior knowledge while learning the new task.We conduct extensive experiments on state-of-the-art large language models including Llama3.1-8B and Vicuna-13B across 1000+ instruction-following tasks to demonstrate the effectiveness of our method. The results demonstrate our method improves generalization on both in-domain and out-of-domain unseen tasks.</abstract>
<identifier type="citekey">zhao-etal-2025-task</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.780</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.780/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>15070</start>
<end>15080</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Task Knowledge Injection via Interpolations and Reinstatement for Large Language Model Generalization
%A Zhao, Yukun
%A Yan, Lingyong
%A Li, Zhenyang
%A Wang, Shuaiqiang
%A Chen, Zhumin
%A Ren, Zhaochun
%A Yin, Dawei
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F zhao-etal-2025-task
%X Large language models have shown tremendous potential across various NLP tasks, and instruction tuning has been widely adopted to elicit their superior performance. However, instruction tuning may overly tailor the models to task-specific formats, potentially compromising their generalization on unseen tasks. We attribute the issue to the spurious correlations learned between inputs and targets. We propose explicit task knowledge injection to mitigate these shortcuts with latent task adaptation and knowledge reinstatement. Latent tasks serve as interpolations between new tasks and facilitate knowledge sharing with joint adaptation enabling the model to build task knowledge more smoothly. Knowledge reinstatement helps optimize building new knowledge with prior knowledge. Specifically, we retrieve input-relevant latent tasks and jointly learn the task and the relevant latent tasks. Moreover, we prompt the model to recall the forms of inputs corresponding to the target and build the task knowledge through the reinstatement of prior knowledge while learning the new task.We conduct extensive experiments on state-of-the-art large language models including Llama3.1-8B and Vicuna-13B across 1000+ instruction-following tasks to demonstrate the effectiveness of our method. The results demonstrate our method improves generalization on both in-domain and out-of-domain unseen tasks.
%R 10.18653/v1/2025.findings-acl.780
%U https://aclanthology.org/2025.findings-acl.780/
%U https://doi.org/10.18653/v1/2025.findings-acl.780
%P 15070-15080
Markdown (Informal)
[Task Knowledge Injection via Interpolations and Reinstatement for Large Language Model Generalization](https://aclanthology.org/2025.findings-acl.780/) (Zhao et al., Findings 2025)
ACL