@inproceedings{sadler-etal-2024-learning,
title = "Learning Communication Policies for Different Follower Behaviors in a Collaborative Reference Game",
author = "Sadler, Philipp and
Hakimov, Sherzod and
Schlangen, David",
editor = "Kordjamshidi, Parisa and
Wang, Xin Eric and
Zhang, Yue and
Ma, Ziqiao and
Inan, Mert",
booktitle = "Proceedings of the 4th Workshop on Spatial Language Understanding and Grounded Communication for Robotics (SpLU-RoboNLP 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.splurobonlp-1.2",
pages = "17--29",
abstract = "In this work, we evaluate the adaptability of neural agents towards assumed partner behaviors in a collaborative reference game. In this game, success is achieved when a knowledgeable guide can verbally lead a follower to the selection of a specific puzzle piece among several distractors. We frame this language grounding and coordination task as a reinforcement learning problem and measure to which extent a common reinforcement training algorithm (PPO) is able to produce neural agents (the guides) that perform well with various heuristic follower behaviors that vary along the dimensions of confidence and autonomy. We experiment with a learning signal that in addition to the goal condition also respects an assumed communicative effort. Our results indicate that this novel ingredient leads to communicative strategies that are less verbose (staying silent in some of the steps) and that with respect to that the guide{'}s strategies indeed adapt to the partner{'}s level of confidence and autonomy.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadler-etal-2024-learning">
<titleInfo>
<title>Learning Communication Policies for Different Follower Behaviors in a Collaborative Reference Game</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Sadler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sherzod</namePart>
<namePart type="family">Hakimov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Spatial Language Understanding and Grounded Communication for Robotics (SpLU-RoboNLP 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Parisa</namePart>
<namePart type="family">Kordjamshidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xin</namePart>
<namePart type="given">Eric</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ziqiao</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mert</namePart>
<namePart type="family">Inan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this work, we evaluate the adaptability of neural agents towards assumed partner behaviors in a collaborative reference game. In this game, success is achieved when a knowledgeable guide can verbally lead a follower to the selection of a specific puzzle piece among several distractors. We frame this language grounding and coordination task as a reinforcement learning problem and measure to which extent a common reinforcement training algorithm (PPO) is able to produce neural agents (the guides) that perform well with various heuristic follower behaviors that vary along the dimensions of confidence and autonomy. We experiment with a learning signal that in addition to the goal condition also respects an assumed communicative effort. Our results indicate that this novel ingredient leads to communicative strategies that are less verbose (staying silent in some of the steps) and that with respect to that the guide’s strategies indeed adapt to the partner’s level of confidence and autonomy.</abstract>
<identifier type="citekey">sadler-etal-2024-learning</identifier>
<location>
<url>https://aclanthology.org/2024.splurobonlp-1.2</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>17</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Learning Communication Policies for Different Follower Behaviors in a Collaborative Reference Game
%A Sadler, Philipp
%A Hakimov, Sherzod
%A Schlangen, David
%Y Kordjamshidi, Parisa
%Y Wang, Xin Eric
%Y Zhang, Yue
%Y Ma, Ziqiao
%Y Inan, Mert
%S Proceedings of the 4th Workshop on Spatial Language Understanding and Grounded Communication for Robotics (SpLU-RoboNLP 2024)
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F sadler-etal-2024-learning
%X In this work, we evaluate the adaptability of neural agents towards assumed partner behaviors in a collaborative reference game. In this game, success is achieved when a knowledgeable guide can verbally lead a follower to the selection of a specific puzzle piece among several distractors. We frame this language grounding and coordination task as a reinforcement learning problem and measure to which extent a common reinforcement training algorithm (PPO) is able to produce neural agents (the guides) that perform well with various heuristic follower behaviors that vary along the dimensions of confidence and autonomy. We experiment with a learning signal that in addition to the goal condition also respects an assumed communicative effort. Our results indicate that this novel ingredient leads to communicative strategies that are less verbose (staying silent in some of the steps) and that with respect to that the guide’s strategies indeed adapt to the partner’s level of confidence and autonomy.
%U https://aclanthology.org/2024.splurobonlp-1.2
%P 17-29
Markdown (Informal)
[Learning Communication Policies for Different Follower Behaviors in a Collaborative Reference Game](https://aclanthology.org/2024.splurobonlp-1.2) (Sadler et al., splurobonlp-WS 2024)
ACL