@InProceedings{mino-EtAl:2017:I17-2,
  author    = {Mino, Hideya  and  Utiyama, Masao  and  Sumita, Eiichiro  and  Tokunaga, Takenobu},
  title     = {Key-value Attention Mechanism for Neural Machine Translation},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {290--295},
  abstract  = {In this paper, we propose a neural machine translation (NMT) with a key-value
	attention mechanism on the source-side encoder. The key-value attention
	mechanism separates the source-side content vector into two types of memory
	known as the key and the value. The key is used for calculating the attention
	distribution, and the value is used for encoding the context representation.
	Experiments on three different tasks indicate that our model outperforms an NMT
	model with a conventional attention mechanism. Furthermore, we perform
	experiments with a conventional NMT framework, in which a part of the initial
	value of a weight matrix is set to zero so that the matrix is as the same
	initial-state as the key-value attention mechanism. As a result, we obtain
	comparable results with the key-value attention mechanism without changing the
	network structure.},
  url       = {http://www.aclweb.org/anthology/I17-2049}
}

