@inproceedings{cano-santin-etal-2020-fast,
title = "Fast visual grounding in interaction: bringing few-shot learning with neural networks to an interactive robot",
author = "Cano Sant{\'\i}n, Jos{\'e} Miguel and
Dobnik, Simon and
Ghanimifard, Mehdi",
editor = "Howes, Christine and
Chatzikyriakidis, Stergios and
Ek, Adam and
Somashekarappa, Vidya",
booktitle = "Proceedings of the Probability and Meaning Conference (PaM 2020)",
month = jun,
year = "2020",
address = "Gothenburg",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.pam-1.7",
pages = "53--61",
abstract = "The major shortcomings of using neural networks with situated agents are that in incremental interaction very few learning examples are available and that their visual sensory representations are quite different from image caption datasets. In this work we adapt and evaluate a few-shot learning approach, Matching Networks (Vinyals et al., 2016), to conversational strategies of a robot interacting with a human tutor in order to efficiently learn to categorise objects that are presented to it and also investigate to what degree transfer learning from pre-trained models on images from different contexts can improve its performance. We discuss the implications of such learning on the nature of semantic representations the system has learned.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cano-santin-etal-2020-fast">
<titleInfo>
<title>Fast visual grounding in interaction: bringing few-shot learning with neural networks to an interactive robot</title>
</titleInfo>
<name type="personal">
<namePart type="given">José</namePart>
<namePart type="given">Miguel</namePart>
<namePart type="family">Cano Santín</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehdi</namePart>
<namePart type="family">Ghanimifard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Probability and Meaning Conference (PaM 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christine</namePart>
<namePart type="family">Howes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Ek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vidya</namePart>
<namePart type="family">Somashekarappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gothenburg</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The major shortcomings of using neural networks with situated agents are that in incremental interaction very few learning examples are available and that their visual sensory representations are quite different from image caption datasets. In this work we adapt and evaluate a few-shot learning approach, Matching Networks (Vinyals et al., 2016), to conversational strategies of a robot interacting with a human tutor in order to efficiently learn to categorise objects that are presented to it and also investigate to what degree transfer learning from pre-trained models on images from different contexts can improve its performance. We discuss the implications of such learning on the nature of semantic representations the system has learned.</abstract>
<identifier type="citekey">cano-santin-etal-2020-fast</identifier>
<location>
<url>https://aclanthology.org/2020.pam-1.7</url>
</location>
<part>
<date>2020-06</date>
<extent unit="page">
<start>53</start>
<end>61</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Fast visual grounding in interaction: bringing few-shot learning with neural networks to an interactive robot
%A Cano Santín, José Miguel
%A Dobnik, Simon
%A Ghanimifard, Mehdi
%Y Howes, Christine
%Y Chatzikyriakidis, Stergios
%Y Ek, Adam
%Y Somashekarappa, Vidya
%S Proceedings of the Probability and Meaning Conference (PaM 2020)
%D 2020
%8 June
%I Association for Computational Linguistics
%C Gothenburg
%F cano-santin-etal-2020-fast
%X The major shortcomings of using neural networks with situated agents are that in incremental interaction very few learning examples are available and that their visual sensory representations are quite different from image caption datasets. In this work we adapt and evaluate a few-shot learning approach, Matching Networks (Vinyals et al., 2016), to conversational strategies of a robot interacting with a human tutor in order to efficiently learn to categorise objects that are presented to it and also investigate to what degree transfer learning from pre-trained models on images from different contexts can improve its performance. We discuss the implications of such learning on the nature of semantic representations the system has learned.
%U https://aclanthology.org/2020.pam-1.7
%P 53-61
Markdown (Informal)
[Fast visual grounding in interaction: bringing few-shot learning with neural networks to an interactive robot](https://aclanthology.org/2020.pam-1.7) (Cano Santín et al., PaM 2020)
ACL