@InProceedings{kiros-chan-hinton:2018:Long,
  author    = {Kiros, Jamie  and  Chan, William  and  Hinton, Geoffrey},
  title     = {Illustrative Language Understanding: Large-Scale Visual Grounding with Image Search},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {922--933},
  abstract  = {We introduce Picturebook, a large-scale lookup operation to ground language via `snapshots' of our physical world accessed through image search. For each word in a vocabulary, we extract the top-$k$ images from Google image search and feed the images through a convolutional network to extract a word embedding. We introduce a multimodal gating function to fuse our Picturebook embeddings with other word representations. We also introduce Inverse Picturebook, a mechanism to map a Picturebook embedding back into words. We experiment and report results across a wide range of tasks: word similarity, natural language inference, semantic relatedness, sentiment/topic classification, image-sentence ranking and machine translation. We also show that gate activations corresponding to Picturebook embeddings are highly correlated to human judgments of concreteness ratings.},
  url       = {http://www.aclweb.org/anthology/P18-1085}
}

