@inproceedings{wang-jansen-2023-self,
title = "Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games",
author = "Wang, Ruoyao and
Jansen, Peter",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.369",
doi = "10.18653/v1/2023.findings-emnlp.369",
pages = "5555--5565",
abstract = "In this work, we introduce a self-supervised behavior cloning transformer for text games, which are challenging benchmarks for multi-step reasoning in virtual environments. Traditionally, Behavior Cloning Transformers excel in such tasks but rely on supervised training data. Our approach auto-generates training data by exploring trajectories (defined by common macro-action sequences) that lead to reward within the games, while determining the generality and utility of these trajectories by rapidly training small models then evalauating their performance on unseen development games. Through empirical analysis, we show our method consistently uncovers generalizable training data, achieving about 90{\%} performance of supervised systems across three benchmark text games.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-jansen-2023-self">
<titleInfo>
<title>Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruoyao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we introduce a self-supervised behavior cloning transformer for text games, which are challenging benchmarks for multi-step reasoning in virtual environments. Traditionally, Behavior Cloning Transformers excel in such tasks but rely on supervised training data. Our approach auto-generates training data by exploring trajectories (defined by common macro-action sequences) that lead to reward within the games, while determining the generality and utility of these trajectories by rapidly training small models then evalauating their performance on unseen development games. Through empirical analysis, we show our method consistently uncovers generalizable training data, achieving about 90% performance of supervised systems across three benchmark text games.</abstract>
<identifier type="citekey">wang-jansen-2023-self</identifier>
<identifier type="doi">10.18653/v1/2023.findings-emnlp.369</identifier>
<location>
<url>https://aclanthology.org/2023.findings-emnlp.369</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>5555</start>
<end>5565</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games
%A Wang, Ruoyao
%A Jansen, Peter
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Findings of the Association for Computational Linguistics: EMNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F wang-jansen-2023-self
%X In this work, we introduce a self-supervised behavior cloning transformer for text games, which are challenging benchmarks for multi-step reasoning in virtual environments. Traditionally, Behavior Cloning Transformers excel in such tasks but rely on supervised training data. Our approach auto-generates training data by exploring trajectories (defined by common macro-action sequences) that lead to reward within the games, while determining the generality and utility of these trajectories by rapidly training small models then evalauating their performance on unseen development games. Through empirical analysis, we show our method consistently uncovers generalizable training data, achieving about 90% performance of supervised systems across three benchmark text games.
%R 10.18653/v1/2023.findings-emnlp.369
%U https://aclanthology.org/2023.findings-emnlp.369
%U https://doi.org/10.18653/v1/2023.findings-emnlp.369
%P 5555-5565
Markdown (Informal)
[Self-Supervised Behavior Cloned Transformers are Path Crawlers for Text Games](https://aclanthology.org/2023.findings-emnlp.369) (Wang & Jansen, Findings 2023)
ACL