@InProceedings{yu-EtAl:2017:VL17,
  author    = {Yu, Yanchao  and  Eshghi, Arash  and  Mills, Gregory  and  Lemon, Oliver},
  title     = {The BURCHAK corpus: a Challenge Data Set for Interactive Learning of Visually Grounded Word Meanings},
  booktitle = {Proceedings of the Sixth Workshop on Vision and Language},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {1--10},
  abstract  = {We motivate and describe a new freely available human-human dialogue data set
	for interactive learning of visually grounded word meanings through ostensive
	definition by a tutor to a learner. The data has been collected using a novel,
	character-by-character variant of the DiET
	chat tool (Healey et al., 2003; anon.) with a novel task, where a Learner needs
	to learn invented visual attribute words (such as “burchak” for square)
	from a tutor. As such, the text-based interactions closely resemble
	face-to-face conversation and thus contain many of the linguistic
	phenomena encountered in natural, spontaneous dialogue. These include self- and
	other-correction, mid-sentence continuations, interruptions, turn overlaps,
	fillers, hedges and many kinds of ellipsis. We also present a generic n-gram
	framework for building user (i.e. tutor) simulations from this type of
	incremental dialogue data, which is freely available to researchers. We show
	that the simulations produce outputs that are similar to the original data
	(e.g. 78% turn match similarity). Finally, we train and evaluate a
	Reinforcement Learning dialogue control agent for learning visually grounded
	word meanings, trained from the BURCHAK corpus. The learned policy shows
	comparable performance to a rule-based system built previously.},
  url       = {http://www.aclweb.org/anthology/W17-2001}
}

