@inproceedings{li-etal-2025-dora,
title = "{DORA}: Dynamic Optimization Prompt for Continuous Reflection of {LLM}-based Agent",
author = "Li, Kun and
Zhao, Tingzhang and
Zhou, Wei and
Hu, Songlin",
editor = "Rambow, Owen and
Wanner, Leo and
Apidianaki, Marianna and
Al-Khalifa, Hend and
Eugenio, Barbara Di and
Schockaert, Steven",
booktitle = "Proceedings of the 31st International Conference on Computational Linguistics",
month = jan,
year = "2025",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.coling-main.504/",
pages = "7546--7557",
abstract = "Autonomous agents powered by large language models (LLMs) hold significant potential across various domains. The Reflection framework is designed to help agents learn from past mistakes in complex tasks. While previous research has shown that reflection can enhance performance, our investigation reveals a key limitation: meaningful self-reflection primarily occurs at the beginning of iterations, with subsequent attempts failing to produce further improvements. We term this phenomenon {\textquotedblleft}Early Stop Reflection,{\textquotedblright} where the reflection process halts prematurely, limiting the agent`s ability to engage in continuous learning. To address this, we propose the DORA method (Dynamic and Optimized Reflection Advice), which generates task-adaptive and diverse reflection advice. DORA introduces an external open-source small language model (SLM) that dynamically generates prompts for the reflection LLM. The SLM uses feedback from the agent and optimizes the prompt generation process through a non-gradient Bayesian Optimization (BO) algorithm, ensuring the reflection process evolves and adapts over time. Our experiments in the MiniWoB++ and Alfworld environments confirm that DORA effectively mitigates the {\textquotedblleft}Early Stop Reflection{\textquotedblright} issue, enabling agents to maintain iterative improvements and boost performance in long-term, complex tasks. Code are available at https://anonymous.4open.science/r/DORA-44FB/."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="li-etal-2025-dora">
<titleInfo>
<title>DORA: Dynamic Optimization Prompt for Continuous Reflection of LLM-based Agent</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kun</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tingzhang</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Songlin</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 31st International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Apidianaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Eugenio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Schockaert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Autonomous agents powered by large language models (LLMs) hold significant potential across various domains. The Reflection framework is designed to help agents learn from past mistakes in complex tasks. While previous research has shown that reflection can enhance performance, our investigation reveals a key limitation: meaningful self-reflection primarily occurs at the beginning of iterations, with subsequent attempts failing to produce further improvements. We term this phenomenon “Early Stop Reflection,” where the reflection process halts prematurely, limiting the agent‘s ability to engage in continuous learning. To address this, we propose the DORA method (Dynamic and Optimized Reflection Advice), which generates task-adaptive and diverse reflection advice. DORA introduces an external open-source small language model (SLM) that dynamically generates prompts for the reflection LLM. The SLM uses feedback from the agent and optimizes the prompt generation process through a non-gradient Bayesian Optimization (BO) algorithm, ensuring the reflection process evolves and adapts over time. Our experiments in the MiniWoB++ and Alfworld environments confirm that DORA effectively mitigates the “Early Stop Reflection” issue, enabling agents to maintain iterative improvements and boost performance in long-term, complex tasks. Code are available at https://anonymous.4open.science/r/DORA-44FB/.</abstract>
<identifier type="citekey">li-etal-2025-dora</identifier>
<location>
<url>https://aclanthology.org/2025.coling-main.504/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>7546</start>
<end>7557</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DORA: Dynamic Optimization Prompt for Continuous Reflection of LLM-based Agent
%A Li, Kun
%A Zhao, Tingzhang
%A Zhou, Wei
%A Hu, Songlin
%Y Rambow, Owen
%Y Wanner, Leo
%Y Apidianaki, Marianna
%Y Al-Khalifa, Hend
%Y Eugenio, Barbara Di
%Y Schockaert, Steven
%S Proceedings of the 31st International Conference on Computational Linguistics
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F li-etal-2025-dora
%X Autonomous agents powered by large language models (LLMs) hold significant potential across various domains. The Reflection framework is designed to help agents learn from past mistakes in complex tasks. While previous research has shown that reflection can enhance performance, our investigation reveals a key limitation: meaningful self-reflection primarily occurs at the beginning of iterations, with subsequent attempts failing to produce further improvements. We term this phenomenon “Early Stop Reflection,” where the reflection process halts prematurely, limiting the agent‘s ability to engage in continuous learning. To address this, we propose the DORA method (Dynamic and Optimized Reflection Advice), which generates task-adaptive and diverse reflection advice. DORA introduces an external open-source small language model (SLM) that dynamically generates prompts for the reflection LLM. The SLM uses feedback from the agent and optimizes the prompt generation process through a non-gradient Bayesian Optimization (BO) algorithm, ensuring the reflection process evolves and adapts over time. Our experiments in the MiniWoB++ and Alfworld environments confirm that DORA effectively mitigates the “Early Stop Reflection” issue, enabling agents to maintain iterative improvements and boost performance in long-term, complex tasks. Code are available at https://anonymous.4open.science/r/DORA-44FB/.
%U https://aclanthology.org/2025.coling-main.504/
%P 7546-7557
Markdown (Informal)
[DORA: Dynamic Optimization Prompt for Continuous Reflection of LLM-based Agent](https://aclanthology.org/2025.coling-main.504/) (Li et al., COLING 2025)
ACL