@InProceedings{welbl-liu-gardner:2017:WNUT,
  author    = {Welbl, Johannes  and  Liu, Nelson F.  and  Gardner, Matt},
  title     = {Crowdsourcing Multiple Choice Science Questions},
  booktitle = {Proceedings of the 3rd Workshop on Noisy User-generated Text},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {94--106},
  abstract  = {We present a novel method for obtaining high-quality, domain-targeted multiple
	choice questions from crowd workers. Generating these questions can be
	difficult without trading away originality, relevance or diversity in the
	answer options. Our method addresses these problems by leveraging a large
	corpus of domain-specific
	text and a small set of existing questions. It produces model suggestions for
	document selection and answer distractor choice which aid the human question
	generation process. With this method we have assembled SciQ, a dataset of 13.7K
	multiple choice science exam questions. We demonstrate that the method produces
	in-domain questions by providing an analysis of this new dataset and by showing
	that humans cannot distinguish the crowdsourced questions from original
	questions. When using SciQ as additional training data to existing questions,
	we observe accuracy improvements on real science exams.},
  url       = {http://www.aclweb.org/anthology/W17-4413}
}

