@inproceedings{gupta-etal-2025-codescm,
title = "{C}ode{SCM}: Causal Analysis for Multi-Modal Code Generation",
author = "Gupta, Mukur and
Bhatt, Noopur and
Jana, Suman",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.naacl-long.345/",
doi = "10.18653/v1/2025.naacl-long.345",
pages = "6779--6793",
ISBN = "979-8-89176-189-6",
abstract = "In this paper, we propose CodeSCM, a Structural Causal Model (SCM) for analyzing multi-modal code generation using large language models (LLMs). By applying interventions to CodeSCM, we measure the causal effects of different prompt modalities, such as natural language, code, and input-output examples, on the model. CodeSCM introduces latent mediator variables to separate the code and natural language semantics of a multi-modal code generation prompt. Using the principles of Causal Mediation Analysis on these mediators we quantify direct effects representing the model{'}s spurious leanings. We find that, in addition to natural language instructions, input-output examples significantly influence code generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gupta-etal-2025-codescm">
<titleInfo>
<title>CodeSCM: Causal Analysis for Multi-Modal Code Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mukur</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noopur</namePart>
<namePart type="family">Bhatt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suman</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-189-6</identifier>
</relatedItem>
<abstract>In this paper, we propose CodeSCM, a Structural Causal Model (SCM) for analyzing multi-modal code generation using large language models (LLMs). By applying interventions to CodeSCM, we measure the causal effects of different prompt modalities, such as natural language, code, and input-output examples, on the model. CodeSCM introduces latent mediator variables to separate the code and natural language semantics of a multi-modal code generation prompt. Using the principles of Causal Mediation Analysis on these mediators we quantify direct effects representing the model’s spurious leanings. We find that, in addition to natural language instructions, input-output examples significantly influence code generation.</abstract>
<identifier type="citekey">gupta-etal-2025-codescm</identifier>
<identifier type="doi">10.18653/v1/2025.naacl-long.345</identifier>
<location>
<url>https://aclanthology.org/2025.naacl-long.345/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>6779</start>
<end>6793</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CodeSCM: Causal Analysis for Multi-Modal Code Generation
%A Gupta, Mukur
%A Bhatt, Noopur
%A Jana, Suman
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-189-6
%F gupta-etal-2025-codescm
%X In this paper, we propose CodeSCM, a Structural Causal Model (SCM) for analyzing multi-modal code generation using large language models (LLMs). By applying interventions to CodeSCM, we measure the causal effects of different prompt modalities, such as natural language, code, and input-output examples, on the model. CodeSCM introduces latent mediator variables to separate the code and natural language semantics of a multi-modal code generation prompt. Using the principles of Causal Mediation Analysis on these mediators we quantify direct effects representing the model’s spurious leanings. We find that, in addition to natural language instructions, input-output examples significantly influence code generation.
%R 10.18653/v1/2025.naacl-long.345
%U https://aclanthology.org/2025.naacl-long.345/
%U https://doi.org/10.18653/v1/2025.naacl-long.345
%P 6779-6793
Markdown (Informal)
[CodeSCM: Causal Analysis for Multi-Modal Code Generation](https://aclanthology.org/2025.naacl-long.345/) (Gupta et al., NAACL 2025)
ACL
- Mukur Gupta, Noopur Bhatt, and Suman Jana. 2025. CodeSCM: Causal Analysis for Multi-Modal Code Generation. In Proceedings of the 2025 Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers), pages 6779–6793, Albuquerque, New Mexico. Association for Computational Linguistics.