@inproceedings{krishnamoorthy-etal-2025-multi,
title = "Multi-Agent Reinforcement Learning for Interactive Code Debugging with Human Feedback and Memory",
author = "Krishnamoorthy, Anjana and
Ivatury, Kartik and
Ahmadnia, Benyamin",
editor = "Angelova, Galia and
Kunilovskaya, Maria and
Escribe, Marie and
Mitkov, Ruslan",
booktitle = "Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era",
month = sep,
year = "2025",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd., Shoumen, Bulgaria",
url = "https://aclanthology.org/2025.ranlp-1.70/",
pages = "595--603",
abstract = "This paper introduces an interactive Python debugging framework that combines multi-agent reinforcement learning, Natural Language Processing (NLP), and long-term memory. Two Proximal Policy Optimization (PPO) agents specialize in syntax and logic errors, generating candidate fixes that developers can accept, reject, or refine. A BERT-based module encodes natural language feedback into dense embeddings and quality scores, which shape reward signals for Reinforcement Learning from Human Feedback (RLHF). To support personalization, the system uses dual FAISS indices to retrieve past fixes based on code-error pairs and developer explanations. Evaluated on a synthetic dataset of 200 Python programs, our approach achieves an 88{\%} syntax-fix rate and 45{\%} logic-fix rate within five suggestions{---}outperforming one-shot Large Language Model (LLM) baselines. In addition, the system improves the quality of the explanation, as measured by BLEU, ROUGE, and CodeBLEU. By integrating multi-agent specialization, linguistic feedback, and memory-driven retrieval, our framework delivers a more efficient, adaptive, and developer-aligned debugging experience."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krishnamoorthy-etal-2025-multi">
<titleInfo>
<title>Multi-Agent Reinforcement Learning for Interactive Code Debugging with Human Feedback and Memory</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anjana</namePart>
<namePart type="family">Krishnamoorthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kartik</namePart>
<namePart type="family">Ivatury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benyamin</namePart>
<namePart type="family">Ahmadnia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era</title>
</titleInfo>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Kunilovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Escribe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd., Shoumen, Bulgaria</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper introduces an interactive Python debugging framework that combines multi-agent reinforcement learning, Natural Language Processing (NLP), and long-term memory. Two Proximal Policy Optimization (PPO) agents specialize in syntax and logic errors, generating candidate fixes that developers can accept, reject, or refine. A BERT-based module encodes natural language feedback into dense embeddings and quality scores, which shape reward signals for Reinforcement Learning from Human Feedback (RLHF). To support personalization, the system uses dual FAISS indices to retrieve past fixes based on code-error pairs and developer explanations. Evaluated on a synthetic dataset of 200 Python programs, our approach achieves an 88% syntax-fix rate and 45% logic-fix rate within five suggestions—outperforming one-shot Large Language Model (LLM) baselines. In addition, the system improves the quality of the explanation, as measured by BLEU, ROUGE, and CodeBLEU. By integrating multi-agent specialization, linguistic feedback, and memory-driven retrieval, our framework delivers a more efficient, adaptive, and developer-aligned debugging experience.</abstract>
<identifier type="citekey">krishnamoorthy-etal-2025-multi</identifier>
<location>
<url>https://aclanthology.org/2025.ranlp-1.70/</url>
</location>
<part>
<date>2025-09</date>
<extent unit="page">
<start>595</start>
<end>603</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi-Agent Reinforcement Learning for Interactive Code Debugging with Human Feedback and Memory
%A Krishnamoorthy, Anjana
%A Ivatury, Kartik
%A Ahmadnia, Benyamin
%Y Angelova, Galia
%Y Kunilovskaya, Maria
%Y Escribe, Marie
%Y Mitkov, Ruslan
%S Proceedings of the 15th International Conference on Recent Advances in Natural Language Processing - Natural Language Processing in the Generative AI Era
%D 2025
%8 September
%I INCOMA Ltd., Shoumen, Bulgaria
%C Varna, Bulgaria
%F krishnamoorthy-etal-2025-multi
%X This paper introduces an interactive Python debugging framework that combines multi-agent reinforcement learning, Natural Language Processing (NLP), and long-term memory. Two Proximal Policy Optimization (PPO) agents specialize in syntax and logic errors, generating candidate fixes that developers can accept, reject, or refine. A BERT-based module encodes natural language feedback into dense embeddings and quality scores, which shape reward signals for Reinforcement Learning from Human Feedback (RLHF). To support personalization, the system uses dual FAISS indices to retrieve past fixes based on code-error pairs and developer explanations. Evaluated on a synthetic dataset of 200 Python programs, our approach achieves an 88% syntax-fix rate and 45% logic-fix rate within five suggestions—outperforming one-shot Large Language Model (LLM) baselines. In addition, the system improves the quality of the explanation, as measured by BLEU, ROUGE, and CodeBLEU. By integrating multi-agent specialization, linguistic feedback, and memory-driven retrieval, our framework delivers a more efficient, adaptive, and developer-aligned debugging experience.
%U https://aclanthology.org/2025.ranlp-1.70/
%P 595-603
Markdown (Informal)
[Multi-Agent Reinforcement Learning for Interactive Code Debugging with Human Feedback and Memory](https://aclanthology.org/2025.ranlp-1.70/) (Krishnamoorthy et al., RANLP 2025)
ACL