@inproceedings{sugawara-etal-2017-evaluation,
title = "Evaluation Metrics for Machine Reading Comprehension: Prerequisite Skills and Readability",
author = "Sugawara, Saku and
Kido, Yusuke and
Yokono, Hikaru and
Aizawa, Akiko",
editor = "Barzilay, Regina and
Kan, Min-Yen",
booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2017",
address = "Vancouver, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P17-1075",
doi = "10.18653/v1/P17-1075",
pages = "806--817",
abstract = "Knowing the quality of reading comprehension (RC) datasets is important for the development of natural-language understanding systems. In this study, two classes of metrics were adopted for evaluating RC datasets: prerequisite skills and readability. We applied these classes to six existing datasets, including MCTest and SQuAD, and highlighted the characteristics of the datasets according to each metric and the correlation between the two classes. Our dataset analysis suggests that the readability of RC datasets does not directly affect the question difficulty and that it is possible to create an RC dataset that is easy to read but difficult to answer.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sugawara-etal-2017-evaluation">
<titleInfo>
<title>Evaluation Metrics for Machine Reading Comprehension: Prerequisite Skills and Readability</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saku</namePart>
<namePart type="family">Sugawara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Kido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hikaru</namePart>
<namePart type="family">Yokono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Barzilay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Knowing the quality of reading comprehension (RC) datasets is important for the development of natural-language understanding systems. In this study, two classes of metrics were adopted for evaluating RC datasets: prerequisite skills and readability. We applied these classes to six existing datasets, including MCTest and SQuAD, and highlighted the characteristics of the datasets according to each metric and the correlation between the two classes. Our dataset analysis suggests that the readability of RC datasets does not directly affect the question difficulty and that it is possible to create an RC dataset that is easy to read but difficult to answer.</abstract>
<identifier type="citekey">sugawara-etal-2017-evaluation</identifier>
<identifier type="doi">10.18653/v1/P17-1075</identifier>
<location>
<url>https://aclanthology.org/P17-1075</url>
</location>
<part>
<date>2017-07</date>
<extent unit="page">
<start>806</start>
<end>817</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Evaluation Metrics for Machine Reading Comprehension: Prerequisite Skills and Readability
%A Sugawara, Saku
%A Kido, Yusuke
%A Yokono, Hikaru
%A Aizawa, Akiko
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F sugawara-etal-2017-evaluation
%X Knowing the quality of reading comprehension (RC) datasets is important for the development of natural-language understanding systems. In this study, two classes of metrics were adopted for evaluating RC datasets: prerequisite skills and readability. We applied these classes to six existing datasets, including MCTest and SQuAD, and highlighted the characteristics of the datasets according to each metric and the correlation between the two classes. Our dataset analysis suggests that the readability of RC datasets does not directly affect the question difficulty and that it is possible to create an RC dataset that is easy to read but difficult to answer.
%R 10.18653/v1/P17-1075
%U https://aclanthology.org/P17-1075
%U https://doi.org/10.18653/v1/P17-1075
%P 806-817
Markdown (Informal)
[Evaluation Metrics for Machine Reading Comprehension: Prerequisite Skills and Readability](https://aclanthology.org/P17-1075) (Sugawara et al., ACL 2017)
ACL