@InProceedings{vaidyanathan-EtAl:2018:Short,
  author    = {Vaidyanathan, Preethi  and  Prud'hommeaux, Emily T.  and  Pelz, Jeff B.  and  Alm, Cecilia O.},
  title     = {SNAG: Spoken Narratives and Gaze Dataset},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {132--137},
  abstract  = {Humans rely on multiple sensory modalities when examining and reasoning over images. In this paper, we describe a new multimodal dataset that consists of gaze measurements and spoken descriptions collected in parallel during an image inspection task. The task was performed by multiple participants on 100 general-domain images showing everyday objects and activities. We demonstrate the usefulness of the dataset by applying an existing visual-linguistic data fusion framework in order to label important image regions with appropriate linguistic labels.},
  url       = {http://www.aclweb.org/anthology/P18-2022}
}