@inproceedings{hong-etal-2026-hiras,
title = "{H}i{RAS}: A Hierarchical Multi-Agent Framework for Paper-to-Code Generation and Execution",
author = "Hong, Hanhua and
LI, Yizhi and
Chen, Jiaoyan and
Ananiadou, Sophia and
Li, Xiaoli and
Kim, Jung-jae and
Lin, Chenghua",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.377/",
pages = "7624--7652",
ISBN = "979-8-89176-395-1",
abstract = "Recent advances in large language models have highlighted their potential to automate computational research, particularly reproducing experimental results. However, existing approaches still use fixed sequential agent pipelines with weak global coordination, which limits their robustness and overall performance. In this work, we propose Hierarchical Research Agent System (HiRAS), a hierarchical multi-agent framework for end-to-end paper reproduction that employs supervisory manager agents to coordinate specialised agents across fine-grained stages. We also identify limitations in the reference-free evaluation of the Paper2Code benchmark and introduce Paper2Code-Extra (P2C-Ex), a refined protocol that incorporates repository-level information and better aligns with the original reference-based metric. We conduct extensive evaluation, validating the effectiveness and robustness of our proposed methods, and observing improvements, including {\ensuremath{>}}10{\%} relative performance gain above the previous state-of-the-art using open-source backbone models and significantly reduced hallucination in the evaluation. All code and data will be made publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hong-etal-2026-hiras">
<titleInfo>
<title>HiRAS: A Hierarchical Multi-Agent Framework for Paper-to-Code Generation and Execution</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanhua</namePart>
<namePart type="family">Hong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yizhi</namePart>
<namePart type="family">LI</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiaoyan</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaoli</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jung-jae</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenghua</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>Recent advances in large language models have highlighted their potential to automate computational research, particularly reproducing experimental results. However, existing approaches still use fixed sequential agent pipelines with weak global coordination, which limits their robustness and overall performance. In this work, we propose Hierarchical Research Agent System (HiRAS), a hierarchical multi-agent framework for end-to-end paper reproduction that employs supervisory manager agents to coordinate specialised agents across fine-grained stages. We also identify limitations in the reference-free evaluation of the Paper2Code benchmark and introduce Paper2Code-Extra (P2C-Ex), a refined protocol that incorporates repository-level information and better aligns with the original reference-based metric. We conduct extensive evaluation, validating the effectiveness and robustness of our proposed methods, and observing improvements, including \ensuremath>10% relative performance gain above the previous state-of-the-art using open-source backbone models and significantly reduced hallucination in the evaluation. All code and data will be made publicly available.</abstract>
<identifier type="citekey">hong-etal-2026-hiras</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.377/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>7624</start>
<end>7652</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HiRAS: A Hierarchical Multi-Agent Framework for Paper-to-Code Generation and Execution
%A Hong, Hanhua
%A LI, Yizhi
%A Chen, Jiaoyan
%A Ananiadou, Sophia
%A Li, Xiaoli
%A Kim, Jung-jae
%A Lin, Chenghua
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F hong-etal-2026-hiras
%X Recent advances in large language models have highlighted their potential to automate computational research, particularly reproducing experimental results. However, existing approaches still use fixed sequential agent pipelines with weak global coordination, which limits their robustness and overall performance. In this work, we propose Hierarchical Research Agent System (HiRAS), a hierarchical multi-agent framework for end-to-end paper reproduction that employs supervisory manager agents to coordinate specialised agents across fine-grained stages. We also identify limitations in the reference-free evaluation of the Paper2Code benchmark and introduce Paper2Code-Extra (P2C-Ex), a refined protocol that incorporates repository-level information and better aligns with the original reference-based metric. We conduct extensive evaluation, validating the effectiveness and robustness of our proposed methods, and observing improvements, including \ensuremath>10% relative performance gain above the previous state-of-the-art using open-source backbone models and significantly reduced hallucination in the evaluation. All code and data will be made publicly available.
%U https://aclanthology.org/2026.findings-acl.377/
%P 7624-7652
Markdown (Informal)
[HiRAS: A Hierarchical Multi-Agent Framework for Paper-to-Code Generation and Execution](https://aclanthology.org/2026.findings-acl.377/) (Hong et al., Findings 2026)
ACL
- Hanhua Hong, Yizhi LI, Jiaoyan Chen, Sophia Ananiadou, Xiaoli Li, Jung-jae Kim, and Chenghua Lin. 2026. HiRAS: A Hierarchical Multi-Agent Framework for Paper-to-Code Generation and Execution. In Findings of the Association for Computational Linguistics: ACL 2026, pages 7624–7652, San Diego, California, United States. Association for Computational Linguistics.