@inproceedings{kaushik-lipton-2018-much,
title = "How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks",
author = "Kaushik, Divyansh and
Lipton, Zachary C.",
editor = "Riloff, Ellen and
Chiang, David and
Hockenmaier, Julia and
Tsujii, Jun{'}ichi",
booktitle = "Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing",
month = oct # "-" # nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D18-1546",
doi = "10.18653/v1/D18-1546",
pages = "5010--5015",
abstract = "Many recent papers address reading comprehension, where examples consist of (question, passage, answer) tuples. Presumably, a model must combine information from both questions and passages to predict corresponding answers. However, despite intense interest in the topic, with hundreds of published papers vying for leaderboard dominance, basic questions about the difficulty of many popular benchmarks remain unanswered. In this paper, we establish sensible baselines for the bAbI, SQuAD, CBT, CNN, and Who-did-What datasets, finding that question- and passage-only models often perform surprisingly well. On 14 out of 20 bAbI tasks, passage-only models achieve greater than 50{\%} accuracy, sometimes matching the full model. Interestingly, while CBT provides 20-sentence passages, only the last is needed for accurate prediction. By comparison, SQuAD and CNN appear better-constructed.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kaushik-lipton-2018-much">
<titleInfo>
<title>How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Divyansh</namePart>
<namePart type="family">Kaushik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zachary</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Lipton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-oct-nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ellen</namePart>
<namePart type="family">Riloff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Chiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Hockenmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jun’ichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Many recent papers address reading comprehension, where examples consist of (question, passage, answer) tuples. Presumably, a model must combine information from both questions and passages to predict corresponding answers. However, despite intense interest in the topic, with hundreds of published papers vying for leaderboard dominance, basic questions about the difficulty of many popular benchmarks remain unanswered. In this paper, we establish sensible baselines for the bAbI, SQuAD, CBT, CNN, and Who-did-What datasets, finding that question- and passage-only models often perform surprisingly well. On 14 out of 20 bAbI tasks, passage-only models achieve greater than 50% accuracy, sometimes matching the full model. Interestingly, while CBT provides 20-sentence passages, only the last is needed for accurate prediction. By comparison, SQuAD and CNN appear better-constructed.</abstract>
<identifier type="citekey">kaushik-lipton-2018-much</identifier>
<identifier type="doi">10.18653/v1/D18-1546</identifier>
<location>
<url>https://aclanthology.org/D18-1546</url>
</location>
<part>
<date>2018-oct-nov</date>
<extent unit="page">
<start>5010</start>
<end>5015</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks
%A Kaushik, Divyansh
%A Lipton, Zachary C.
%Y Riloff, Ellen
%Y Chiang, David
%Y Hockenmaier, Julia
%Y Tsujii, Jun’ichi
%S Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing
%D 2018
%8 oct nov
%I Association for Computational Linguistics
%C Brussels, Belgium
%F kaushik-lipton-2018-much
%X Many recent papers address reading comprehension, where examples consist of (question, passage, answer) tuples. Presumably, a model must combine information from both questions and passages to predict corresponding answers. However, despite intense interest in the topic, with hundreds of published papers vying for leaderboard dominance, basic questions about the difficulty of many popular benchmarks remain unanswered. In this paper, we establish sensible baselines for the bAbI, SQuAD, CBT, CNN, and Who-did-What datasets, finding that question- and passage-only models often perform surprisingly well. On 14 out of 20 bAbI tasks, passage-only models achieve greater than 50% accuracy, sometimes matching the full model. Interestingly, while CBT provides 20-sentence passages, only the last is needed for accurate prediction. By comparison, SQuAD and CNN appear better-constructed.
%R 10.18653/v1/D18-1546
%U https://aclanthology.org/D18-1546
%U https://doi.org/10.18653/v1/D18-1546
%P 5010-5015
Markdown (Informal)
[How Much Reading Does Reading Comprehension Require? A Critical Investigation of Popular Benchmarks](https://aclanthology.org/D18-1546) (Kaushik & Lipton, EMNLP 2018)
ACL