@InProceedings{jiang-kummerfeld-lasecki:2017:Short,
  author    = {Jiang, Youxuan  and  Kummerfeld, Jonathan K.  and  Lasecki, Walter S.},
  title     = {Understanding Task Design Trade-offs in Crowdsourced Paraphrase Collection},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {103--109},
  abstract  = {Linguistically diverse datasets are critical for training and evaluating robust
	machine learning systems, but data collection is a costly process that often
	requires experts. Crowdsourcing the process of paraphrase generation is an
	effective means of expanding natural language datasets, but there has been
	limited analysis of the trade-offs that arise when designing tasks. In this
	paper, we present the first systematic study of the key factors in
	crowdsourcing paraphrase collection. We consider variations in instructions,
	incentives, data domains, and workflows. We manually analyzed paraphrases for
	correctness, grammaticality, and linguistic diversity. Our observations provide
	new insight into the trade-offs between accuracy and diversity in crowd
	responses that arise as a result of task design, providing guidance for future
	paraphrase generation procedures.},
  url       = {http://aclweb.org/anthology/P17-2017}
}

