@inproceedings{parrish-etal-2022-single,
title = "Single-Turn Debate Does Not Help Humans Answer Hard Reading-Comprehension Questions",
author = "Parrish, Alicia and
Trivedi, Harsh and
Perez, Ethan and
Chen, Angelica and
Nangia, Nikita and
Phang, Jason and
Bowman, Samuel",
editor = "Andreas, Jacob and
Narasimhan, Karthik and
Nematzadeh, Aida",
booktitle = "Proceedings of the First Workshop on Learning with Natural Language Supervision",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.lnls-1.3",
doi = "10.18653/v1/2022.lnls-1.3",
pages = "17--28",
abstract = "Current QA systems can generate reasonable-sounding yet false answers without explanation or evidence for the generated answer, which is especially problematic when humans cannot readily check the model{'}s answers. This presents a challenge for building trust in machine learning systems. We take inspiration from real-world situations where difficult questions are answered by considering opposing sides (see Irving et al., 2018). For multiple-choice QA examples, we build a dataset of single arguments for both a correct and incorrect answer option in a debate-style set-up as an initial step in training models to produce explanations for two candidate answers. We use long contexts{---}humans familiar with the context write convincing explanations for pre-selected correct and incorrect answers, and we test if those explanations allow humans who have not read the full context to more accurately determine the correct answer. We do not find that explanations in our set-up improve human accuracy, but a baseline condition shows that providing human-selected text snippets does improve accuracy. We use these findings to suggest ways of improving the debate set up for future data collection efforts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="parrish-etal-2022-single">
<titleInfo>
<title>Single-Turn Debate Does Not Help Humans Answer Hard Reading-Comprehension Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alicia</namePart>
<namePart type="family">Parrish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harsh</namePart>
<namePart type="family">Trivedi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ethan</namePart>
<namePart type="family">Perez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Angelica</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikita</namePart>
<namePart type="family">Nangia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jason</namePart>
<namePart type="family">Phang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Bowman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Learning with Natural Language Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jacob</namePart>
<namePart type="family">Andreas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karthik</namePart>
<namePart type="family">Narasimhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aida</namePart>
<namePart type="family">Nematzadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Current QA systems can generate reasonable-sounding yet false answers without explanation or evidence for the generated answer, which is especially problematic when humans cannot readily check the model’s answers. This presents a challenge for building trust in machine learning systems. We take inspiration from real-world situations where difficult questions are answered by considering opposing sides (see Irving et al., 2018). For multiple-choice QA examples, we build a dataset of single arguments for both a correct and incorrect answer option in a debate-style set-up as an initial step in training models to produce explanations for two candidate answers. We use long contexts—humans familiar with the context write convincing explanations for pre-selected correct and incorrect answers, and we test if those explanations allow humans who have not read the full context to more accurately determine the correct answer. We do not find that explanations in our set-up improve human accuracy, but a baseline condition shows that providing human-selected text snippets does improve accuracy. We use these findings to suggest ways of improving the debate set up for future data collection efforts.</abstract>
<identifier type="citekey">parrish-etal-2022-single</identifier>
<identifier type="doi">10.18653/v1/2022.lnls-1.3</identifier>
<location>
<url>https://aclanthology.org/2022.lnls-1.3</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>17</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Single-Turn Debate Does Not Help Humans Answer Hard Reading-Comprehension Questions
%A Parrish, Alicia
%A Trivedi, Harsh
%A Perez, Ethan
%A Chen, Angelica
%A Nangia, Nikita
%A Phang, Jason
%A Bowman, Samuel
%Y Andreas, Jacob
%Y Narasimhan, Karthik
%Y Nematzadeh, Aida
%S Proceedings of the First Workshop on Learning with Natural Language Supervision
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F parrish-etal-2022-single
%X Current QA systems can generate reasonable-sounding yet false answers without explanation or evidence for the generated answer, which is especially problematic when humans cannot readily check the model’s answers. This presents a challenge for building trust in machine learning systems. We take inspiration from real-world situations where difficult questions are answered by considering opposing sides (see Irving et al., 2018). For multiple-choice QA examples, we build a dataset of single arguments for both a correct and incorrect answer option in a debate-style set-up as an initial step in training models to produce explanations for two candidate answers. We use long contexts—humans familiar with the context write convincing explanations for pre-selected correct and incorrect answers, and we test if those explanations allow humans who have not read the full context to more accurately determine the correct answer. We do not find that explanations in our set-up improve human accuracy, but a baseline condition shows that providing human-selected text snippets does improve accuracy. We use these findings to suggest ways of improving the debate set up for future data collection efforts.
%R 10.18653/v1/2022.lnls-1.3
%U https://aclanthology.org/2022.lnls-1.3
%U https://doi.org/10.18653/v1/2022.lnls-1.3
%P 17-28
Markdown (Informal)
[Single-Turn Debate Does Not Help Humans Answer Hard Reading-Comprehension Questions](https://aclanthology.org/2022.lnls-1.3) (Parrish et al., LNLS 2022)
ACL