@InProceedings{yoshikawa-shigeto-takeuchi:2017:Short,
  author    = {Yoshikawa, Yuya  and  Shigeto, Yutaro  and  Takeuchi, Akikazu},
  title     = {STAIR Captions: Constructing a Large-Scale Japanese Image Caption Dataset},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {417--421},
  abstract  = {In recent years, automatic generation of image descriptions (captions), that
	is, image captioning, has attracted a great deal of attention.
	In this paper, we particularly consider generating Japanese captions for
	images.
	Since most available caption datasets have been constructed for English
	language, there are few datasets for Japanese.
	To tackle this problem, we construct a large-scale Japanese image caption
	dataset based on images from MS-COCO, which is called STAIR Captions.
	STAIR Captions consists of 820,310 Japanese captions for 164,062 images.
	In the experiment, we show that a neural network trained using STAIR Captions
	can generate more natural and better Japanese captions, compared to those
	generated using English-Japanese machine translation after generating English
	captions.},
  url       = {http://aclweb.org/anthology/P17-2066}
}