@inproceedings{rentschler-roberts-2025-rl,
title = "{RL} + Transformer = A General-Purpose Problem Solver",
author = "Rentschler, Micah and
Roberts, Jesse",
editor = "Kamalloo, Ehsan and
Gontier, Nicolas and
Lu, Xing Han and
Dziri, Nouha and
Murty, Shikhar and
Lacoste, Alexandre",
booktitle = "Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.realm-1.29/",
doi = "10.18653/v1/2025.realm-1.29",
pages = "401--410",
ISBN = "979-8-89176-264-0",
abstract = "What if artificial intelligence could not only solve problems for which it was trained but also teach itself to tackle novel tasks? In this paper, we finetune Llama 3.1 using reinforcement learning on the grid-world game Frozen Lake and investigate its ability to solve maps it has never encountered{---}a phenomenon recently termed In-Context Reinforcement Learning (ICRL). Without additional training, the transformer demonstrates the capacity to adapt to both in-distribution and out-of-distribution environment parameterizations. Moreover, it remains effective when trained on data that blends optimal and suboptimal behavior, combines strategies from its context (behavior-stitching), and dynamically adapts to non-stationary environments. These proof-of-concept findings suggest that in-context learning via reinforcement-tuned transformers may form the basis of a promising general-purpose problem-solver."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rentschler-roberts-2025-rl">
<titleInfo>
<title>RL + Transformer = A General-Purpose Problem Solver</title>
</titleInfo>
<name type="personal">
<namePart type="given">Micah</namePart>
<namePart type="family">Rentschler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesse</namePart>
<namePart type="family">Roberts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ehsan</namePart>
<namePart type="family">Kamalloo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolas</namePart>
<namePart type="family">Gontier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xing</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nouha</namePart>
<namePart type="family">Dziri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikhar</namePart>
<namePart type="family">Murty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexandre</namePart>
<namePart type="family">Lacoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-264-0</identifier>
</relatedItem>
<abstract>What if artificial intelligence could not only solve problems for which it was trained but also teach itself to tackle novel tasks? In this paper, we finetune Llama 3.1 using reinforcement learning on the grid-world game Frozen Lake and investigate its ability to solve maps it has never encountered—a phenomenon recently termed In-Context Reinforcement Learning (ICRL). Without additional training, the transformer demonstrates the capacity to adapt to both in-distribution and out-of-distribution environment parameterizations. Moreover, it remains effective when trained on data that blends optimal and suboptimal behavior, combines strategies from its context (behavior-stitching), and dynamically adapts to non-stationary environments. These proof-of-concept findings suggest that in-context learning via reinforcement-tuned transformers may form the basis of a promising general-purpose problem-solver.</abstract>
<identifier type="citekey">rentschler-roberts-2025-rl</identifier>
<identifier type="doi">10.18653/v1/2025.realm-1.29</identifier>
<location>
<url>https://aclanthology.org/2025.realm-1.29/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>401</start>
<end>410</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T RL + Transformer = A General-Purpose Problem Solver
%A Rentschler, Micah
%A Roberts, Jesse
%Y Kamalloo, Ehsan
%Y Gontier, Nicolas
%Y Lu, Xing Han
%Y Dziri, Nouha
%Y Murty, Shikhar
%Y Lacoste, Alexandre
%S Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-264-0
%F rentschler-roberts-2025-rl
%X What if artificial intelligence could not only solve problems for which it was trained but also teach itself to tackle novel tasks? In this paper, we finetune Llama 3.1 using reinforcement learning on the grid-world game Frozen Lake and investigate its ability to solve maps it has never encountered—a phenomenon recently termed In-Context Reinforcement Learning (ICRL). Without additional training, the transformer demonstrates the capacity to adapt to both in-distribution and out-of-distribution environment parameterizations. Moreover, it remains effective when trained on data that blends optimal and suboptimal behavior, combines strategies from its context (behavior-stitching), and dynamically adapts to non-stationary environments. These proof-of-concept findings suggest that in-context learning via reinforcement-tuned transformers may form the basis of a promising general-purpose problem-solver.
%R 10.18653/v1/2025.realm-1.29
%U https://aclanthology.org/2025.realm-1.29/
%U https://doi.org/10.18653/v1/2025.realm-1.29
%P 401-410
Markdown (Informal)
[RL + Transformer = A General-Purpose Problem Solver](https://aclanthology.org/2025.realm-1.29/) (Rentschler & Roberts, REALM 2025)
ACL
- Micah Rentschler and Jesse Roberts. 2025. RL + Transformer = A General-Purpose Problem Solver. In Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025), pages 401–410, Vienna, Austria. Association for Computational Linguistics.