@InProceedings{belinkov-EtAl:2017:I17-1,
  author    = {Belinkov, Yonatan  and  M\`{a}rquez, Llu\'{i}s  and  Sajjad, Hassan  and  Durrani, Nadir  and  Dalvi, Fahim  and  Glass, James},
  title     = {Evaluating Layers of Representation in Neural Machine Translation on Part-of-Speech and Semantic Tagging Tasks},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {1--10},
  abstract  = {While neural machine translation (NMT) models provide improved translation
	quality in an elegant framework, it is less clear what they learn about
	language. Recent work has started evaluating the quality of vector
	representations learned by NMT models on morphological and syntactic tasks. In
	this paper, we investigate the representations learned at different layers of
	NMT encoders. We train NMT systems on parallel data and use the models to
	extract features for training a classifier on two tasks: part-of-speech and
	semantic tagging. We then measure the performance of the classifier as a proxy
	to the quality of the original NMT model for the given task. Our quantitative
	analysis yields interesting insights regarding representation learning in NMT
	models. For instance, we find that higher layers are better at learning
	semantics while lower layers tend to be better for part-of-speech tagging. We
	also observe little effect of the target language on source-side
	representations, especially in higher quality models.},
  url       = {http://www.aclweb.org/anthology/I17-1001}
}

