@InProceedings{kuhnle-copestake:2018:W18-10,
  author    = {Kuhnle, Alexander  and  Copestake, Ann},
  title     = {Deep learning evaluation using deep linguistic processing},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {17--23},
  abstract  = {We discuss problems with the standard approaches to evaluation for tasks like visual question answering, and argue that artificial data can be used to address these as a complement to current practice. We demonstrate that with the help of existing `deep' linguistic processing technology we are able to create challenging abstract datasets, which enable us to investigate the language understanding abilities of multimodal deep learning models in detail, as compared to a single performance value on a static and monolithic dataset.},
  url       = {http://www.aclweb.org/anthology/W18-1003}
}