@inproceedings{wang-etal-2024-e2cl,
title = "{E}$^2${CL}: Exploration-based Error Correction Learning for Embodied Agents",
author = "Wang, Hanlin and
Leong, Chak Tou and
Wang, Jian and
Li, Wenjie",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.448",
pages = "7626--7639",
abstract = "Language models are exhibiting increasing capability in knowledge utilization and reasoning. However, when applied as agents in embodied environments, they often suffer from misalignment between their intrinsic knowledge and environmental knowledge, leading to infeasible actions. Traditional environment alignment methods, such as supervised learning on expert trajectories and reinforcement learning, encounter limitations in covering environmental knowledge and achieving efficient convergence, respectively. Inspired by human learning, we propose Exploration-based Error Correction Learning (E$^2$CL), a novel framework that leverages exploration-induced errors and environmental feedback to enhance environment alignment for embodied agents. E$^2$CL incorporates teacher-guided and teacher-free explorations to gather environmental feedback and correct erroneous actions. The agent learns to provide feedback and self-correct, thereby enhancing its adaptability to target environments. Extensive experiments in the VirtualHome environment demonstrate that E$^2$CL-trained agents outperform those trained by baseline methods and exhibit superior self-correction capabilities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2024-e2cl">
<titleInfo>
<title>E²CL: Exploration-based Error Correction Learning for Embodied Agents</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hanlin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chak</namePart>
<namePart type="given">Tou</namePart>
<namePart type="family">Leong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jian</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Language models are exhibiting increasing capability in knowledge utilization and reasoning. However, when applied as agents in embodied environments, they often suffer from misalignment between their intrinsic knowledge and environmental knowledge, leading to infeasible actions. Traditional environment alignment methods, such as supervised learning on expert trajectories and reinforcement learning, encounter limitations in covering environmental knowledge and achieving efficient convergence, respectively. Inspired by human learning, we propose Exploration-based Error Correction Learning (E²CL), a novel framework that leverages exploration-induced errors and environmental feedback to enhance environment alignment for embodied agents. E²CL incorporates teacher-guided and teacher-free explorations to gather environmental feedback and correct erroneous actions. The agent learns to provide feedback and self-correct, thereby enhancing its adaptability to target environments. Extensive experiments in the VirtualHome environment demonstrate that E²CL-trained agents outperform those trained by baseline methods and exhibit superior self-correction capabilities.</abstract>
<identifier type="citekey">wang-etal-2024-e2cl</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.448</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>7626</start>
<end>7639</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T E²CL: Exploration-based Error Correction Learning for Embodied Agents
%A Wang, Hanlin
%A Leong, Chak Tou
%A Wang, Jian
%A Li, Wenjie
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F wang-etal-2024-e2cl
%X Language models are exhibiting increasing capability in knowledge utilization and reasoning. However, when applied as agents in embodied environments, they often suffer from misalignment between their intrinsic knowledge and environmental knowledge, leading to infeasible actions. Traditional environment alignment methods, such as supervised learning on expert trajectories and reinforcement learning, encounter limitations in covering environmental knowledge and achieving efficient convergence, respectively. Inspired by human learning, we propose Exploration-based Error Correction Learning (E²CL), a novel framework that leverages exploration-induced errors and environmental feedback to enhance environment alignment for embodied agents. E²CL incorporates teacher-guided and teacher-free explorations to gather environmental feedback and correct erroneous actions. The agent learns to provide feedback and self-correct, thereby enhancing its adaptability to target environments. Extensive experiments in the VirtualHome environment demonstrate that E²CL-trained agents outperform those trained by baseline methods and exhibit superior self-correction capabilities.
%U https://aclanthology.org/2024.findings-emnlp.448
%P 7626-7639
Markdown (Informal)
[E2CL: Exploration-based Error Correction Learning for Embodied Agents](https://aclanthology.org/2024.findings-emnlp.448) (Wang et al., Findings 2024)
ACL