@InProceedings{zhang-bowman:2018:BlackboxNLP,
  author    = {Zhang, Kelly  and  Bowman, Samuel},
  title     = {Language Modeling Teaches You More than Translation Does: Lessons Learned Through Auxiliary Syntactic Task Analysis},
  booktitle = {Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP},
  month     = {November},
  year      = {2018},
  address   = {Brussels, Belgium},
  publisher = {Association for Computational Linguistics},
  pages     = {359--361},
  abstract  = {Recently, researchers have found that deep LSTMs trained on tasks like machine translation learn substantial syntactic and semantic information about their input sentences, including part-of-speech. These findings begin to shed light on why pretrained representations, like ELMo and CoVe, are so beneficial for neural language understanding models. We still, though, do not yet have a clear understanding of how the choice of pretraining objective affects the type of linguistic information that models learn. With this in mind, we compare four objectives---language modeling, translation, skip-thought, and autoencoding---on their ability to induce syntactic and part-of-speech information, holding constant the quantity and genre of the training data, as well as the LSTM architecture.},
  url       = {http://www.aclweb.org/anthology/W18-5448}
}