@inproceedings{tandon-etal-2019-wiqa,
title = "{WIQA}: A dataset for {``}What if...{''} reasoning over procedural text",
author = "Tandon, Niket and
Dalvi, Bhavana and
Sakaguchi, Keisuke and
Clark, Peter and
Bosselut, Antoine",
editor = "Inui, Kentaro and
Jiang, Jing and
Ng, Vincent and
Wan, Xiaojun",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)",
month = nov,
year = "2019",
address = "Hong Kong, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/D19-1629",
doi = "10.18653/v1/D19-1629",
pages = "6076--6085",
abstract = "We introduce WIQA, the first large-scale dataset of {``}What if...{''} questions over procedural text. WIQA contains a collection of paragraphs, each annotated with multiple influence graphs describing how one change affects another, and a large (40k) collection of {``}What if...?{''} multiple-choice questions derived from these. For example, given a paragraph about beach erosion, would stormy weather hasten or decelerate erosion? WIQA contains three kinds of questions: perturbations to steps mentioned in the paragraph; external (out-of-paragraph) perturbations requiring commonsense knowledge; and irrelevant (no effect) perturbations. We find that state-of-the-art models achieve 73.8{\%} accuracy, well below the human performance of 96.3{\%}. We analyze the challenges, in particular tracking chains of influences, and present the dataset as an open challenge to the community.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tandon-etal-2019-wiqa">
<titleInfo>
<title>WIQA: A dataset for “What if...” reasoning over procedural text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Niket</namePart>
<namePart type="family">Tandon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bhavana</namePart>
<namePart type="family">Dalvi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Keisuke</namePart>
<namePart type="family">Sakaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Clark</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Bosselut</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kentaro</namePart>
<namePart type="family">Inui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Ng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiaojun</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Hong Kong, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We introduce WIQA, the first large-scale dataset of “What if...” questions over procedural text. WIQA contains a collection of paragraphs, each annotated with multiple influence graphs describing how one change affects another, and a large (40k) collection of “What if...?” multiple-choice questions derived from these. For example, given a paragraph about beach erosion, would stormy weather hasten or decelerate erosion? WIQA contains three kinds of questions: perturbations to steps mentioned in the paragraph; external (out-of-paragraph) perturbations requiring commonsense knowledge; and irrelevant (no effect) perturbations. We find that state-of-the-art models achieve 73.8% accuracy, well below the human performance of 96.3%. We analyze the challenges, in particular tracking chains of influences, and present the dataset as an open challenge to the community.</abstract>
<identifier type="citekey">tandon-etal-2019-wiqa</identifier>
<identifier type="doi">10.18653/v1/D19-1629</identifier>
<location>
<url>https://aclanthology.org/D19-1629</url>
</location>
<part>
<date>2019-11</date>
<extent unit="page">
<start>6076</start>
<end>6085</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T WIQA: A dataset for “What if...” reasoning over procedural text
%A Tandon, Niket
%A Dalvi, Bhavana
%A Sakaguchi, Keisuke
%A Clark, Peter
%A Bosselut, Antoine
%Y Inui, Kentaro
%Y Jiang, Jing
%Y Ng, Vincent
%Y Wan, Xiaojun
%S Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)
%D 2019
%8 November
%I Association for Computational Linguistics
%C Hong Kong, China
%F tandon-etal-2019-wiqa
%X We introduce WIQA, the first large-scale dataset of “What if...” questions over procedural text. WIQA contains a collection of paragraphs, each annotated with multiple influence graphs describing how one change affects another, and a large (40k) collection of “What if...?” multiple-choice questions derived from these. For example, given a paragraph about beach erosion, would stormy weather hasten or decelerate erosion? WIQA contains three kinds of questions: perturbations to steps mentioned in the paragraph; external (out-of-paragraph) perturbations requiring commonsense knowledge; and irrelevant (no effect) perturbations. We find that state-of-the-art models achieve 73.8% accuracy, well below the human performance of 96.3%. We analyze the challenges, in particular tracking chains of influences, and present the dataset as an open challenge to the community.
%R 10.18653/v1/D19-1629
%U https://aclanthology.org/D19-1629
%U https://doi.org/10.18653/v1/D19-1629
%P 6076-6085
Markdown (Informal)
[WIQA: A dataset for “What if...” reasoning over procedural text](https://aclanthology.org/D19-1629) (Tandon et al., EMNLP-IJCNLP 2019)
ACL
- Niket Tandon, Bhavana Dalvi, Keisuke Sakaguchi, Peter Clark, and Antoine Bosselut. 2019. WIQA: A dataset for “What if...” reasoning over procedural text. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), pages 6076–6085, Hong Kong, China. Association for Computational Linguistics.