@inproceedings{chi-etal-2025-visualcoder,
title = "{V}isual{C}oder: Guiding Large Language Models in Code Execution with Fine-grained Multimodal Chain-of-Thought Reasoning",
author = "Chi, Cuong Le and
Hoang, Chau Truong Vinh and
Huy, Phan Nhật and
Le, Dung D. and
Nguyen, Tien N and
Bui, Nghi D. Q.",
editor = "Chiruzzo, Luis and
Ritter, Alan and
Wang, Lu",
booktitle = "Findings of the Association for Computational Linguistics: NAACL 2025",
month = apr,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-naacl.370/",
doi = "10.18653/v1/2025.findings-naacl.370",
pages = "6628--6645",
ISBN = "979-8-89176-195-7",
abstract = "Predicting program behavior and reasoning about code execution remain significant challenges in software engineering, particularly for large language models (LLMs) designed for code analysis. While these models excel at understanding static syntax, they often struggle with dynamic reasoning tasks. We introduce VisualCoder, a simple yet effective approach that enhances code reasoning by integrating multimodal Chain-of-Thought (CoT) reasoning with a visual Control Flow Graph (CFG). By aligning code snippets with their corresponding CFGs, VisualCoder provides deeper insights into execution flows. We address challenges in multimodal CoT integration through a reference mechanism, ensuring consistency between code and its execution path, thereby improving performance in program behavior prediction, error detection, and output generation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chi-etal-2025-visualcoder">
<titleInfo>
<title>VisualCoder: Guiding Large Language Models in Code Execution with Fine-grained Multimodal Chain-of-Thought Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cuong</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Chi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chau</namePart>
<namePart type="given">Truong</namePart>
<namePart type="given">Vinh</namePart>
<namePart type="family">Hoang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Phan</namePart>
<namePart type="given">Nhật</namePart>
<namePart type="family">Huy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dung</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tien</namePart>
<namePart type="given">N</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nghi</namePart>
<namePart type="given">D</namePart>
<namePart type="given">Q</namePart>
<namePart type="family">Bui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: NAACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lu</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-195-7</identifier>
</relatedItem>
<abstract>Predicting program behavior and reasoning about code execution remain significant challenges in software engineering, particularly for large language models (LLMs) designed for code analysis. While these models excel at understanding static syntax, they often struggle with dynamic reasoning tasks. We introduce VisualCoder, a simple yet effective approach that enhances code reasoning by integrating multimodal Chain-of-Thought (CoT) reasoning with a visual Control Flow Graph (CFG). By aligning code snippets with their corresponding CFGs, VisualCoder provides deeper insights into execution flows. We address challenges in multimodal CoT integration through a reference mechanism, ensuring consistency between code and its execution path, thereby improving performance in program behavior prediction, error detection, and output generation.</abstract>
<identifier type="citekey">chi-etal-2025-visualcoder</identifier>
<identifier type="doi">10.18653/v1/2025.findings-naacl.370</identifier>
<location>
<url>https://aclanthology.org/2025.findings-naacl.370/</url>
</location>
<part>
<date>2025-04</date>
<extent unit="page">
<start>6628</start>
<end>6645</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T VisualCoder: Guiding Large Language Models in Code Execution with Fine-grained Multimodal Chain-of-Thought Reasoning
%A Chi, Cuong Le
%A Hoang, Chau Truong Vinh
%A Huy, Phan Nhật
%A Le, Dung D.
%A Nguyen, Tien N.
%A Bui, Nghi D. Q.
%Y Chiruzzo, Luis
%Y Ritter, Alan
%Y Wang, Lu
%S Findings of the Association for Computational Linguistics: NAACL 2025
%D 2025
%8 April
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-195-7
%F chi-etal-2025-visualcoder
%X Predicting program behavior and reasoning about code execution remain significant challenges in software engineering, particularly for large language models (LLMs) designed for code analysis. While these models excel at understanding static syntax, they often struggle with dynamic reasoning tasks. We introduce VisualCoder, a simple yet effective approach that enhances code reasoning by integrating multimodal Chain-of-Thought (CoT) reasoning with a visual Control Flow Graph (CFG). By aligning code snippets with their corresponding CFGs, VisualCoder provides deeper insights into execution flows. We address challenges in multimodal CoT integration through a reference mechanism, ensuring consistency between code and its execution path, thereby improving performance in program behavior prediction, error detection, and output generation.
%R 10.18653/v1/2025.findings-naacl.370
%U https://aclanthology.org/2025.findings-naacl.370/
%U https://doi.org/10.18653/v1/2025.findings-naacl.370
%P 6628-6645
Markdown (Informal)
[VisualCoder: Guiding Large Language Models in Code Execution with Fine-grained Multimodal Chain-of-Thought Reasoning](https://aclanthology.org/2025.findings-naacl.370/) (Chi et al., Findings 2025)
ACL