@article{chatterjee-etal-2025-effect,
title = "On the Effect of Instruction Tuning Loss on Generalization",
author = "Chatterjee, Anwoy and
Kowndinya Renduchintala, H. S. V. N. S. and
Bhatia, Sumit and
Chakraborty, Tanmoy",
journal = "Transactions of the Association for Computational Linguistics",
volume = "13",
year = "2025",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2025.tacl-1.62/",
doi = "10.1162/tacl.a.42",
pages = "1360--1380",
abstract = "Instruction tuning has emerged as a pivotal post-training paradigm that enables pre-trained language models to better follow user instructions. Despite its significance, little attention has been given to optimizing the loss function used. A fundamental, yet often overlooked, question is whether the conventional auto-regressive objective{---}where loss is computed only on response tokens, excluding prompt tokens{---}is truly optimal for instruction tuning. In this work, we systematically investigate the impact of differentially weighting prompt and response tokens in instruction tuning loss, and propose Weighted Instruction Tuning (WIT) as a better alternative to conventional instruction tuning. Through extensive experiments on five language models of different families and scale, three finetuning datasets of different sizes, and five diverse evaluation benchmarks, we show that the standard instruction tuning loss often yields suboptimal performance and limited robustness to input prompt variations. We find that a low-to-moderate weight for prompt tokens coupled with a moderate-to-high weight for response tokens yields the best-performing models across settings and also serves as a better starting point for the subsequent preference alignment training. These findings highlight the need to reconsider instruction-tuning loss and offer actionable insights for developing more robust and generalizable models. Our code is open-sourced here."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chatterjee-etal-2025-effect">
<titleInfo>
<title>On the Effect of Instruction Tuning Loss on Generalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anwoy</namePart>
<namePart type="family">Chatterjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">S</namePart>
<namePart type="given">V</namePart>
<namePart type="given">N</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Kowndinya Renduchintala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sumit</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Instruction tuning has emerged as a pivotal post-training paradigm that enables pre-trained language models to better follow user instructions. Despite its significance, little attention has been given to optimizing the loss function used. A fundamental, yet often overlooked, question is whether the conventional auto-regressive objective—where loss is computed only on response tokens, excluding prompt tokens—is truly optimal for instruction tuning. In this work, we systematically investigate the impact of differentially weighting prompt and response tokens in instruction tuning loss, and propose Weighted Instruction Tuning (WIT) as a better alternative to conventional instruction tuning. Through extensive experiments on five language models of different families and scale, three finetuning datasets of different sizes, and five diverse evaluation benchmarks, we show that the standard instruction tuning loss often yields suboptimal performance and limited robustness to input prompt variations. We find that a low-to-moderate weight for prompt tokens coupled with a moderate-to-high weight for response tokens yields the best-performing models across settings and also serves as a better starting point for the subsequent preference alignment training. These findings highlight the need to reconsider instruction-tuning loss and offer actionable insights for developing more robust and generalizable models. Our code is open-sourced here.</abstract>
<identifier type="citekey">chatterjee-etal-2025-effect</identifier>
<identifier type="doi">10.1162/tacl.a.42</identifier>
<location>
<url>https://aclanthology.org/2025.tacl-1.62/</url>
</location>
<part>
<date>2025</date>
<detail type="volume"><number>13</number></detail>
<extent unit="page">
<start>1360</start>
<end>1380</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T On the Effect of Instruction Tuning Loss on Generalization
%A Chatterjee, Anwoy
%A Kowndinya Renduchintala, H. S. V. N. S.
%A Bhatia, Sumit
%A Chakraborty, Tanmoy
%J Transactions of the Association for Computational Linguistics
%D 2025
%V 13
%I MIT Press
%C Cambridge, MA
%F chatterjee-etal-2025-effect
%X Instruction tuning has emerged as a pivotal post-training paradigm that enables pre-trained language models to better follow user instructions. Despite its significance, little attention has been given to optimizing the loss function used. A fundamental, yet often overlooked, question is whether the conventional auto-regressive objective—where loss is computed only on response tokens, excluding prompt tokens—is truly optimal for instruction tuning. In this work, we systematically investigate the impact of differentially weighting prompt and response tokens in instruction tuning loss, and propose Weighted Instruction Tuning (WIT) as a better alternative to conventional instruction tuning. Through extensive experiments on five language models of different families and scale, three finetuning datasets of different sizes, and five diverse evaluation benchmarks, we show that the standard instruction tuning loss often yields suboptimal performance and limited robustness to input prompt variations. We find that a low-to-moderate weight for prompt tokens coupled with a moderate-to-high weight for response tokens yields the best-performing models across settings and also serves as a better starting point for the subsequent preference alignment training. These findings highlight the need to reconsider instruction-tuning loss and offer actionable insights for developing more robust and generalizable models. Our code is open-sourced here.
%R 10.1162/tacl.a.42
%U https://aclanthology.org/2025.tacl-1.62/
%U https://doi.org/10.1162/tacl.a.42
%P 1360-1380
Markdown (Informal)
[On the Effect of Instruction Tuning Loss on Generalization](https://aclanthology.org/2025.tacl-1.62/) (Chatterjee et al., TACL 2025)
ACL