@InProceedings{belinkov-EtAl:2017:Long,
  author    = {Belinkov, Yonatan  and  Durrani, Nadir  and  Dalvi, Fahim  and  Sajjad, Hassan  and  Glass, James},
  title     = {What do Neural Machine Translation Models Learn about Morphology?},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {861--872},
  abstract  = {Neural machine translation (MT) models obtain state-of-the-art performance
	while maintaining a simple, end-to-end architecture. However, little is known
	about what these models learn about source and target languages during the
	training process. In this work, we analyze the representations learned by
	neural MT models at various levels of granularity and empirically evaluate the
	quality of the representations for learning morphology through extrinsic
	part-of-speech and morphological tagging tasks. We conduct a thorough
	investigation along several parameters: word-based vs. character-based
	representations, depth of the encoding layer, the identity of the target
	language, and encoder vs. decoder representations. Our data-driven,
	quantitative evaluation sheds light on important aspects in the neural MT
	system and its ability to capture word structure.},
  url       = {http://aclweb.org/anthology/P17-1080}
}