@inproceedings{zhao-etal-2021-meta,
title = "Meta-Reinforcement Learning for Mastering Multiple Skills and Generalizing across Environments in Text-based Games",
author = "Zhao, Zhenjie and
Sun, Mingfei and
Ma, Xiaojuan",
editor = "Lee, Hung-Yi and
Mohtarami, Mitra and
Li, Shang-Wen and
Jin, Di and
Korpusik, Mandy and
Dong, Shuyan and
Vu, Ngoc Thang and
Hakkani-Tur, Dilek",
booktitle = "Proceedings of the 1st Workshop on Meta Learning and Its Applications to Natural Language Processing",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.metanlp-1.1/",
doi = "10.18653/v1/2021.metanlp-1.1",
pages = "1--10",
abstract = "Text-based games can be used to develop task-oriented text agents for accomplishing tasks with high-level language instructions, which has potential applications in domains such as human-robot interaction. Given a text instruction, reinforcement learning is commonly used to train agents to complete the intended task owing to its convenience of learning policies automatically. However, because of the large space of combinatorial text actions, learning a policy network that generates an action word by word with reinforcement learning is challenging. Recent research works show that imitation learning provides an effective way of training a generation-based policy network. However, trained agents with imitation learning are hard to master a wide spectrum of task types or skills, and it is also difficult for them to generalize to new environments. In this paper, we propose a meta reinforcement learning based method to train text agents through learning-to-explore. In particular, the text agent first explores the environment to gather task-specific information and then adapts the execution policy for solving the task with this information. On the publicly available testbed ALFWorld, we conducted a comparison study with imitation learning and show the superiority of our method."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhao-etal-2021-meta">
<titleInfo>
<title>Meta-Reinforcement Learning for Mastering Multiple Skills and Generalizing across Environments in Text-based Games</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zhenjie</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mingfei</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojuan</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Meta Learning and Its Applications to Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hung-Yi</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mitra</namePart>
<namePart type="family">Mohtarami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shang-Wen</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Di</namePart>
<namePart type="family">Jin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mandy</namePart>
<namePart type="family">Korpusik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shuyan</namePart>
<namePart type="family">Dong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ngoc</namePart>
<namePart type="given">Thang</namePart>
<namePart type="family">Vu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Text-based games can be used to develop task-oriented text agents for accomplishing tasks with high-level language instructions, which has potential applications in domains such as human-robot interaction. Given a text instruction, reinforcement learning is commonly used to train agents to complete the intended task owing to its convenience of learning policies automatically. However, because of the large space of combinatorial text actions, learning a policy network that generates an action word by word with reinforcement learning is challenging. Recent research works show that imitation learning provides an effective way of training a generation-based policy network. However, trained agents with imitation learning are hard to master a wide spectrum of task types or skills, and it is also difficult for them to generalize to new environments. In this paper, we propose a meta reinforcement learning based method to train text agents through learning-to-explore. In particular, the text agent first explores the environment to gather task-specific information and then adapts the execution policy for solving the task with this information. On the publicly available testbed ALFWorld, we conducted a comparison study with imitation learning and show the superiority of our method.</abstract>
<identifier type="citekey">zhao-etal-2021-meta</identifier>
<identifier type="doi">10.18653/v1/2021.metanlp-1.1</identifier>
<location>
<url>https://aclanthology.org/2021.metanlp-1.1/</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>1</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Meta-Reinforcement Learning for Mastering Multiple Skills and Generalizing across Environments in Text-based Games
%A Zhao, Zhenjie
%A Sun, Mingfei
%A Ma, Xiaojuan
%Y Lee, Hung-Yi
%Y Mohtarami, Mitra
%Y Li, Shang-Wen
%Y Jin, Di
%Y Korpusik, Mandy
%Y Dong, Shuyan
%Y Vu, Ngoc Thang
%Y Hakkani-Tur, Dilek
%S Proceedings of the 1st Workshop on Meta Learning and Its Applications to Natural Language Processing
%D 2021
%8 August
%I Association for Computational Linguistics
%C Online
%F zhao-etal-2021-meta
%X Text-based games can be used to develop task-oriented text agents for accomplishing tasks with high-level language instructions, which has potential applications in domains such as human-robot interaction. Given a text instruction, reinforcement learning is commonly used to train agents to complete the intended task owing to its convenience of learning policies automatically. However, because of the large space of combinatorial text actions, learning a policy network that generates an action word by word with reinforcement learning is challenging. Recent research works show that imitation learning provides an effective way of training a generation-based policy network. However, trained agents with imitation learning are hard to master a wide spectrum of task types or skills, and it is also difficult for them to generalize to new environments. In this paper, we propose a meta reinforcement learning based method to train text agents through learning-to-explore. In particular, the text agent first explores the environment to gather task-specific information and then adapts the execution policy for solving the task with this information. On the publicly available testbed ALFWorld, we conducted a comparison study with imitation learning and show the superiority of our method.
%R 10.18653/v1/2021.metanlp-1.1
%U https://aclanthology.org/2021.metanlp-1.1/
%U https://doi.org/10.18653/v1/2021.metanlp-1.1
%P 1-10
Markdown (Informal)
[Meta-Reinforcement Learning for Mastering Multiple Skills and Generalizing across Environments in Text-based Games](https://aclanthology.org/2021.metanlp-1.1/) (Zhao et al., MetaNLP 2021)
ACL