@inproceedings{sadler-etal-2023-yes,
title = "Yes, this Way! Learning to Ground Referring Expressions into Actions with Intra-episodic Feedback from Supportive Teachers",
author = "Sadler, Philipp and
Hakimov, Sherzod and
Schlangen, David",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.587",
doi = "10.18653/v1/2023.findings-acl.587",
pages = "9228--9239",
abstract = "The ability to pick up on language signals in an ongoing interaction is crucial for future machine learning models to collaborate and interact with humans naturally. In this paper, we present an initial study that evaluates intra-episodic feedback given in a collaborative setting. We use a referential language game as a controllable example of a task-oriented collaborative joint activity. A teacher utters a referring expression generated by a well-known symbolic algorithm (the {``}Incremental Algorithm{''}) as an initial instruction and then monitors the follower{'}s actions to possibly intervene with intra-episodic feedback (which does not explicitly have to be requested). We frame this task as a reinforcement learning problem with sparse rewards and learn a follower policy for a heuristic teacher. Our results show that intra-episodic feedback allows the follower to generalize on aspects of scene complexity and performs better than providing only the initial statement.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadler-etal-2023-yes">
<titleInfo>
<title>Yes, this Way! Learning to Ground Referring Expressions into Actions with Intra-episodic Feedback from Supportive Teachers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Sadler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sherzod</namePart>
<namePart type="family">Hakimov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Schlangen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The ability to pick up on language signals in an ongoing interaction is crucial for future machine learning models to collaborate and interact with humans naturally. In this paper, we present an initial study that evaluates intra-episodic feedback given in a collaborative setting. We use a referential language game as a controllable example of a task-oriented collaborative joint activity. A teacher utters a referring expression generated by a well-known symbolic algorithm (the “Incremental Algorithm”) as an initial instruction and then monitors the follower’s actions to possibly intervene with intra-episodic feedback (which does not explicitly have to be requested). We frame this task as a reinforcement learning problem with sparse rewards and learn a follower policy for a heuristic teacher. Our results show that intra-episodic feedback allows the follower to generalize on aspects of scene complexity and performs better than providing only the initial statement.</abstract>
<identifier type="citekey">sadler-etal-2023-yes</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.587</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.587</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>9228</start>
<end>9239</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Yes, this Way! Learning to Ground Referring Expressions into Actions with Intra-episodic Feedback from Supportive Teachers
%A Sadler, Philipp
%A Hakimov, Sherzod
%A Schlangen, David
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F sadler-etal-2023-yes
%X The ability to pick up on language signals in an ongoing interaction is crucial for future machine learning models to collaborate and interact with humans naturally. In this paper, we present an initial study that evaluates intra-episodic feedback given in a collaborative setting. We use a referential language game as a controllable example of a task-oriented collaborative joint activity. A teacher utters a referring expression generated by a well-known symbolic algorithm (the “Incremental Algorithm”) as an initial instruction and then monitors the follower’s actions to possibly intervene with intra-episodic feedback (which does not explicitly have to be requested). We frame this task as a reinforcement learning problem with sparse rewards and learn a follower policy for a heuristic teacher. Our results show that intra-episodic feedback allows the follower to generalize on aspects of scene complexity and performs better than providing only the initial statement.
%R 10.18653/v1/2023.findings-acl.587
%U https://aclanthology.org/2023.findings-acl.587
%U https://doi.org/10.18653/v1/2023.findings-acl.587
%P 9228-9239
Markdown (Informal)
[Yes, this Way! Learning to Ground Referring Expressions into Actions with Intra-episodic Feedback from Supportive Teachers](https://aclanthology.org/2023.findings-acl.587) (Sadler et al., Findings 2023)
ACL