@Book{W18-10:2018,
  editor    = {Yonatan Bisk  and  Omer Levy  and  Mark Yatskar},
  title     = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  url       = {http://www.aclweb.org/anthology/W18-10}
}

@InProceedings{wadhwa-EtAl:2018:W18-10,
  author    = {Wadhwa, Soumya  and  Embar, Varsha  and  Grabmair, Matthias  and  Nyberg, Eric},
  title     = {Towards Inference-Oriented Reading Comprehension: ParallelQA},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {1--7},
  abstract  = {In this paper, we investigate the tendency of end-to-end neural Machine Reading Comprehension (MRC) models to match shallow patterns rather than perform inference-oriented reasoning on RC benchmarks. We aim to test the ability of these systems to answer questions which focus on referential inference. We propose ParallelQA, a strategy to formulate such questions using parallel passages. We also demonstrate that existing neural models fail to generalize well to this setting.},
  url       = {http://www.aclweb.org/anthology/W18-1001}
}

@InProceedings{jastrzebski-EtAl:2018:W18-10,
  author    = {Jastrzebski, Stanislaw  and  Bahdanau, Dzmitry  and  Hosseini, Seyedarian  and  Noukhovitch, Michael  and  Bengio, Yoshua  and  Cheung, Jackie},
  title     = {Commonsense mining as knowledge base completion? A study on the impact of novelty},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {8--16},
  abstract  = {Commonsense knowledge bases such as ConceptNet represent knowledge in the form of relational triples. Inspired by recent work by Li et al., we analyse if knowledge base completion models can be used to mine commonsense knowledge from raw text. We propose novelty of predicted triples with respect to the training set as an important factor in interpreting results. We critically analyse the difficulty of mining novel commonsense knowledge, and show that a simple baseline method that outperforms the previous state of the art on predicting more novel triples.},
  url       = {http://www.aclweb.org/anthology/W18-1002}
}

@InProceedings{kuhnle-copestake:2018:W18-10,
  author    = {Kuhnle, Alexander  and  Copestake, Ann},
  title     = {Deep learning evaluation using deep linguistic processing},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {17--23},
  abstract  = {We discuss problems with the standard approaches to evaluation for tasks like visual question answering, and argue that artificial data can be used to address these as a complement to current practice. We demonstrate that with the help of existing `deep' linguistic processing technology we are able to create challenging abstract datasets, which enable us to investigate the language understanding abilities of multimodal deep learning models in detail, as compared to a single performance value on a static and monolithic dataset.},
  url       = {http://www.aclweb.org/anthology/W18-1003}
}

@InProceedings{weber-shekhar-balasubramanian:2018:W18-10,
  author    = {Weber, Noah  and  Shekhar, Leena  and  Balasubramanian, Niranjan},
  title     = {The Fine Line between Linguistic Generalization and Failure in Seq2Seq-Attention Models},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {24--27},
  abstract  = {Seq2Seq based neural architectures have become the go-to architecture to apply to sequence to sequence language tasks. Despite their excellent performance on these tasks, recent work has noted that these models typically do not fully capture the linguistic structure required to generalize beyond the dense sections of the data distribution \cite{ettinger2017towards}, and as such, are likely to fail on examples from the tail end of the distribution (such as inputs that are noisy \citep{belkinovnmtbreak}, or of different length \citep{bentivoglinmtlength}). In this paper we look at a model's ability to generalize on a simple symbol rewriting task with a clearly defined structure. We find that the model's ability to generalize this structure beyond the training distribution depends greatly on the chosen random seed, even when performance on the test set remains the same. This finding suggests that model's ability to capture generalizable structure is highly sensitive, and more so, this sensitivity may not be apparent when evaluating the model on standard test sets.},
  url       = {http://www.aclweb.org/anthology/W18-1004}
}

@InProceedings{mitchell-EtAl:2018:W18-10,
  author    = {Mitchell, Jeff  and  Stenetorp, Pontus  and  Minervini, Pasquale  and  Riedel, Sebastian},
  title     = {Extrapolation in NLP},
  booktitle = {Proceedings of the Workshop on Generalization in the Age of Deep Learning},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {28--33},
  abstract  = {We argue that extrapolation to unseen data will often be easier for models that capture global structures, rather than just maximise their local fit to the training data. We show that this is true for two popular models: the Decomposable Attention Model and word2vec.},
  url       = {http://www.aclweb.org/anthology/W18-1005}
}