@InProceedings{khashabi-EtAl:2018:N18-1,
  author    = {Khashabi, Daniel  and  Chaturvedi, Snigdha  and  Roth, Michael  and  Upadhyay, Shyam  and  Roth, Dan},
  title     = {Looking Beyond the Surface: A Challenge Set for Reading Comprehension over Multiple Sentences},
  booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
  month     = {June},
  year      = {2018},
  address   = {New Orleans, Louisiana},
  publisher = {Association for Computational Linguistics},
  pages     = {252--262},
  abstract  = {We present a reading comprehension challenge in which questions can only be answered by taking into account information from multiple sentences. We solicit and verify questions and answers for this challenge through a 4-step crowdsourcing experiment. Our challenge dataset contains 6,500+ questions for 1000+ paragraphs across 7 different domains (elementary school science, news, travel guides, fiction stories, etc) bringing in linguistic diversity to the texts and to the questions wordings. On a subset of our dataset, we found human solvers to achieve an F1-score of 88.1%. We analyze a range of baselines, including a recent state-of-art reading comprehension system, and demonstrate the difficulty of this challenge, despite a high human performance. The dataset is the first to study multi-sentence inference at scale, with an open-ended set of question types that requires reasoning skills.},
  url       = {http://www.aclweb.org/anthology/N18-1023}
}