@InProceedings{gardent-EtAl:2017:Long,
  author    = {Gardent, Claire  and  Shimorina, Anastasia  and  Narayan, Shashi  and  Perez-Beltrachini, Laura},
  title     = {Creating Training Corpora for NLG Micro-Planners},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {179--188},
  abstract  = {In this paper, we present a novel framework for semi-automatically
	creating linguistically challenging micro-planning data-to-text
	corpora from existing Knowledge Bases. Because our method pairs data
	of varying size and shape with texts ranging from simple clauses to
	short texts, a dataset created using this framework provides a
	challenging benchmark for microplanning. Another feature of this
	framework is that it can be applied to any large scale knowledge base
	and can therefore be used to train and learn KB verbalisers.  We apply
	our framework to DBpedia data and compare the resulting dataset with
	Wen et al. 2016's. We show that while Wen et al.'s dataset is
	more than twice larger than ours, it is less diverse both in terms of
	input and in terms of text. We thus propose our corpus generation
	framework as a novel method for creating challenging data sets from
	which NLG models can be learned which are capable of handling the
	complex interactions occurring during in micro-planning between
	lexicalisation, aggregation, surface realisation, referring expression
	generation and sentence segmentation. To encourage researchers to take
	up this challenge, we made available a dataset of 21,855 data/text
	pairs created using this framework in the context of the
	WebNLG shared task.},
  url       = {http://aclweb.org/anthology/P17-1017}
}

