@InProceedings{sugawara-EtAl:2017:Long,
  author    = {Sugawara, Saku  and  Kido, Yusuke  and  Yokono, Hikaru  and  Aizawa, Akiko},
  title     = {Evaluation Metrics for Machine Reading Comprehension: Prerequisite Skills and Readability},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {806--817},
  abstract  = {Knowing the quality of reading comprehension (RC) datasets is important for the
	development of natural-language understanding systems.
	  In this study, two classes of metrics were adopted for evaluating RC
	datasets: prerequisite skills and readability. We applied these classes to six
	existing datasets, including MCTest and SQuAD, and highlighted the
	characteristics of the datasets according to each metric and the correlation
	between the two classes.
	  Our dataset analysis suggests that the readability of RC datasets does not
	directly affect the question difficulty and that it is possible to create an RC
	dataset that is easy to read but difficult to answer.},
  url       = {http://aclweb.org/anthology/P17-1075}
}

