@InProceedings{trischler-EtAl:2017:RepL4NLP,
  author    = {Trischler, Adam  and  Wang, Tong  and  Yuan, Xingdi  and  Harris, Justin  and  Sordoni, Alessandro  and  Bachman, Philip  and  Suleman, Kaheer},
  title     = {NewsQA: A Machine Comprehension Dataset},
  booktitle = {Proceedings of the 2nd Workshop on Representation Learning for NLP},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {191--200},
  abstract  = {We present NewsQA, a challenging machine comprehension dataset of over 100,000
	human-generated question-answer pairs. Crowdworkers supply questions and
	answers based on a set of over 10,000 news articles from CNN, with answers
	consisting of spans of text in the articles. We collect this dataset through a
	four-stage process designed to solicit exploratory questions that require
	reasoning. Analysis confirms that NewsQA demands abilities beyond simple
	word matching and recognizing textual entailment. We measure human performance
	on the dataset and compare it to several strong neural models. The performance
	gap between humans and machines (13.3% F1) indicates that significant progress
	can be made on NewsQA through future research. The dataset is freely available
	online.},
  url       = {http://www.aclweb.org/anthology/W17-2623}
}

