@inproceedings{dey-lal-2025-transferability,
title = "On the Transferability of Causal Knowledge for Language Models",
author = "Dey, Gourab and
Lal, Yash Kumar",
editor = "Clark, Elizabeth and
Lal, Yash Kumar and
Chaturvedi, Snigdha and
Iyyer, Mohit and
Brei, Anneliese and
Modi, Ashutosh and
Chandu, Khyathi Raghavi",
booktitle = "Proceedings of the The 7th Workshop on Narrative Understanding",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.wnu-1.3/",
doi = "10.18653/v1/2025.wnu-1.3",
pages = "8--14",
ISBN = "979-8-89176-247-3",
abstract = "Language understanding includes identifying logical connections between events in a discourse, such as news and instructional text. We study the transferability of causal knowledge across these two domains by analyzing the extent to which understanding preconditions in narratives such as news articles can help models reason about cooking recipes, and vice-versa. Our experiments show that using instructions to pretrain small models on one domain before similarly finetuning it on the other shows a slight improvement over just finetuning it. We also find that finetuning the models on a mix of both types of data is better ({\textasciitilde}3-7{\%}) for understanding causal relations in instructional text. While we find that the improvements do not translate to larger or already instruction tuned models, our analysis highlights the aspects of a plan that are better captured through the interoperability of causal knowledge."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dey-lal-2025-transferability">
<titleInfo>
<title>On the Transferability of Causal Knowledge for Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gourab</namePart>
<namePart type="family">Dey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Lal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the The 7th Workshop on Narrative Understanding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Lal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Snigdha</namePart>
<namePart type="family">Chaturvedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Iyyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anneliese</namePart>
<namePart type="family">Brei</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashutosh</namePart>
<namePart type="family">Modi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khyathi</namePart>
<namePart type="given">Raghavi</namePart>
<namePart type="family">Chandu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-247-3</identifier>
</relatedItem>
<abstract>Language understanding includes identifying logical connections between events in a discourse, such as news and instructional text. We study the transferability of causal knowledge across these two domains by analyzing the extent to which understanding preconditions in narratives such as news articles can help models reason about cooking recipes, and vice-versa. Our experiments show that using instructions to pretrain small models on one domain before similarly finetuning it on the other shows a slight improvement over just finetuning it. We also find that finetuning the models on a mix of both types of data is better (~3-7%) for understanding causal relations in instructional text. While we find that the improvements do not translate to larger or already instruction tuned models, our analysis highlights the aspects of a plan that are better captured through the interoperability of causal knowledge.</abstract>
<identifier type="citekey">dey-lal-2025-transferability</identifier>
<identifier type="doi">10.18653/v1/2025.wnu-1.3</identifier>
<location>
<url>https://aclanthology.org/2025.wnu-1.3/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>8</start>
<end>14</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Transferability of Causal Knowledge for Language Models
%A Dey, Gourab
%A Lal, Yash Kumar
%Y Clark, Elizabeth
%Y Lal, Yash Kumar
%Y Chaturvedi, Snigdha
%Y Iyyer, Mohit
%Y Brei, Anneliese
%Y Modi, Ashutosh
%Y Chandu, Khyathi Raghavi
%S Proceedings of the The 7th Workshop on Narrative Understanding
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-247-3
%F dey-lal-2025-transferability
%X Language understanding includes identifying logical connections between events in a discourse, such as news and instructional text. We study the transferability of causal knowledge across these two domains by analyzing the extent to which understanding preconditions in narratives such as news articles can help models reason about cooking recipes, and vice-versa. Our experiments show that using instructions to pretrain small models on one domain before similarly finetuning it on the other shows a slight improvement over just finetuning it. We also find that finetuning the models on a mix of both types of data is better (~3-7%) for understanding causal relations in instructional text. While we find that the improvements do not translate to larger or already instruction tuned models, our analysis highlights the aspects of a plan that are better captured through the interoperability of causal knowledge.
%R 10.18653/v1/2025.wnu-1.3
%U https://aclanthology.org/2025.wnu-1.3/
%U https://doi.org/10.18653/v1/2025.wnu-1.3
%P 8-14
Markdown (Informal)
[On the Transferability of Causal Knowledge for Language Models](https://aclanthology.org/2025.wnu-1.3/) (Dey & Lal, WNU 2025)
ACL