@InProceedings{ravichander-EtAl:2017:W17-55,
  author    = {Ravichander, Abhilasha  and  Manzini, Thomas  and  Grabmair, Matthias  and  Neubig, Graham  and  Francis, Jonathan  and  Nyberg, Eric},
  title     = {How Would You Say It? Eliciting Lexically Diverse Dialogue for Supervised Semantic Parsing},
  booktitle = {Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue},
  month     = {August},
  year      = {2017},
  address   = {Saarbrücken, Germany},
  publisher = {Association for Computational Linguistics},
  pages     = {374--383},
  abstract  = {Building dialogue interfaces for real-world scenarios often entails training
	semantic parsers starting from zero examples. How can we build datasets that
	better capture the variety of ways users might phrase their queries, and what
	queries are actually realistic?  \newcite{Wang2015BuildingAS} proposed a method
	to build semantic parsing datasets by generating canonical utterances using a
	grammar and having crowdworkers paraphrase them into natural wording. A
	limitation of this approach is that it induces bias towards using similar
	language as the canonical utterances.  In this work, we present a methodology
	that elicits meaningful and lexically diverse queries from users for semantic
	parsing tasks. Starting from a seed lexicon and a generative grammar, we pair
	logical forms with mixed text-image representations and ask crowdworkers to
	paraphrase and confirm the plausibility of the queries that they generated. We
	use this method to build a semantic parsing dataset from scratch for a dialog
	agent in a smart-home simulation. We find evidence that this dataset, which we
	have named SmartHome, is demonstrably more lexically diverse and difficult to
	parse than existing domain-specific semantic parsing datasets.},
  url       = {http://aclweb.org/anthology/W17-5545}
}