@InProceedings{hristov-EtAl:2017:RoboNLP,
  author    = {Hristov, Yordan  and  Penkov, Svetlin  and  Lascarides, Alex  and  Ramamoorthy, Subramanian},
  title     = {Grounding Symbols in Multi-Modal Instructions},
  booktitle = {Proceedings of the First Workshop on Language Grounding for Robotics},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {49--57},
  abstract  = {As robots begin to cohabit with humans in semi-structured environments, the
	need arises to understand instructions involving rich variability---for
	instance, learning to ground symbols in the physical world. Realistically, this
	task must cope with small datasets consisting of a particular users' contextual
	assignment of meaning to terms. We present a method for processing a raw stream
	of cross-modal input---i.e., linguistic instructions, visual perception of a
	scene and a concurrent trace of 3D eye tracking fixations---to produce the
	segmentation of objects with a correspondent association to high-level
	concepts. To test our framework we present experiments in a table-top object
	manipulation scenario. Our results show our model learns the user's notion of
	colour and shape from a small number of physical demonstrations, generalising
	to identifying physical referents for novel combinations of the words.},
  url       = {http://www.aclweb.org/anthology/W17-2807}
}