@inproceedings{fu-wang-2021-semi,
title = "Semi-Supervised Policy Initialization for Playing Games with Language Hints",
author = "Fu, Tsu-Jui and
Wang, William Yang",
editor = "Toutanova, Kristina and
Rumshisky, Anna and
Zettlemoyer, Luke and
Hakkani-Tur, Dilek and
Beltagy, Iz and
Bethard, Steven and
Cotterell, Ryan and
Chakraborty, Tanmoy and
Zhou, Yichao",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.249",
doi = "10.18653/v1/2021.naacl-main.249",
pages = "3112--3116",
abstract = "Using natural language as a hint can supply an additional reward for playing sparse-reward games. Achieving a goal should involve several different hints, while the given hints are usually incomplete. Those unmentioned latent hints still rely on the sparse reward signal, and make the learning process difficult. In this paper, we propose semi-supervised initialization (SSI) that allows the agent to learn from various possible hints before training under different tasks. Experiments show that SSI not only helps to learn faster (\textbf{1.2x}) but also has a higher success rate (\textbf{11{\%}} relative improvement) of the final policy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fu-wang-2021-semi">
<titleInfo>
<title>Semi-Supervised Policy Initialization for Playing Games with Language Hints</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tsu-Jui</namePart>
<namePart type="family">Fu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">Yang</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Toutanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luke</namePart>
<namePart type="family">Zettlemoyer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dilek</namePart>
<namePart type="family">Hakkani-Tur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iz</namePart>
<namePart type="family">Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yichao</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Using natural language as a hint can supply an additional reward for playing sparse-reward games. Achieving a goal should involve several different hints, while the given hints are usually incomplete. Those unmentioned latent hints still rely on the sparse reward signal, and make the learning process difficult. In this paper, we propose semi-supervised initialization (SSI) that allows the agent to learn from various possible hints before training under different tasks. Experiments show that SSI not only helps to learn faster (1.2x) but also has a higher success rate (11% relative improvement) of the final policy.</abstract>
<identifier type="citekey">fu-wang-2021-semi</identifier>
<identifier type="doi">10.18653/v1/2021.naacl-main.249</identifier>
<location>
<url>https://aclanthology.org/2021.naacl-main.249</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>3112</start>
<end>3116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Semi-Supervised Policy Initialization for Playing Games with Language Hints
%A Fu, Tsu-Jui
%A Wang, William Yang
%Y Toutanova, Kristina
%Y Rumshisky, Anna
%Y Zettlemoyer, Luke
%Y Hakkani-Tur, Dilek
%Y Beltagy, Iz
%Y Bethard, Steven
%Y Cotterell, Ryan
%Y Chakraborty, Tanmoy
%Y Zhou, Yichao
%S Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F fu-wang-2021-semi
%X Using natural language as a hint can supply an additional reward for playing sparse-reward games. Achieving a goal should involve several different hints, while the given hints are usually incomplete. Those unmentioned latent hints still rely on the sparse reward signal, and make the learning process difficult. In this paper, we propose semi-supervised initialization (SSI) that allows the agent to learn from various possible hints before training under different tasks. Experiments show that SSI not only helps to learn faster (1.2x) but also has a higher success rate (11% relative improvement) of the final policy.
%R 10.18653/v1/2021.naacl-main.249
%U https://aclanthology.org/2021.naacl-main.249
%U https://doi.org/10.18653/v1/2021.naacl-main.249
%P 3112-3116
Markdown (Informal)
[Semi-Supervised Policy Initialization for Playing Games with Language Hints](https://aclanthology.org/2021.naacl-main.249) (Fu & Wang, NAACL 2021)
ACL