@inproceedings{wallace-boyd-graber-2018-trick,
title = "Trick Me If You Can: Adversarial Writing of Trivia Challenge Questions",
author = "Wallace, Eric and
Boyd-Graber, Jordan",
editor = "Shwartz, Vered and
Tabassum, Jeniya and
Voigt, Rob and
Che, Wanxiang and
de Marneffe, Marie-Catherine and
Nissim, Malvina",
booktitle = "Proceedings of {ACL} 2018, Student Research Workshop",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P18-3018",
doi = "10.18653/v1/P18-3018",
pages = "127--133",
abstract = "Modern question answering systems have been touted as approaching human performance. However, existing question answering datasets are imperfect tests. Questions are written with humans in mind, not computers, and often do not properly expose model limitations. To address this, we develop an adversarial writing setting, where humans interact with trained models and try to break them. This annotation process yields a challenge set, which despite being easy for trivia players to answer, systematically stumps automated question answering systems. Diagnosing model errors on the evaluation data provides actionable insights to explore in developing robust and generalizable question answering systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wallace-boyd-graber-2018-trick">
<titleInfo>
<title>Trick Me If You Can: Adversarial Writing of Trivia Challenge Questions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Wallace</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ACL 2018, Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vered</namePart>
<namePart type="family">Shwartz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jeniya</namePart>
<namePart type="family">Tabassum</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rob</namePart>
<namePart type="family">Voigt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Melbourne, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Modern question answering systems have been touted as approaching human performance. However, existing question answering datasets are imperfect tests. Questions are written with humans in mind, not computers, and often do not properly expose model limitations. To address this, we develop an adversarial writing setting, where humans interact with trained models and try to break them. This annotation process yields a challenge set, which despite being easy for trivia players to answer, systematically stumps automated question answering systems. Diagnosing model errors on the evaluation data provides actionable insights to explore in developing robust and generalizable question answering systems.</abstract>
<identifier type="citekey">wallace-boyd-graber-2018-trick</identifier>
<identifier type="doi">10.18653/v1/P18-3018</identifier>
<location>
<url>https://aclanthology.org/P18-3018</url>
</location>
<part>
<date>2018-07</date>
<extent unit="page">
<start>127</start>
<end>133</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Trick Me If You Can: Adversarial Writing of Trivia Challenge Questions
%A Wallace, Eric
%A Boyd-Graber, Jordan
%Y Shwartz, Vered
%Y Tabassum, Jeniya
%Y Voigt, Rob
%Y Che, Wanxiang
%Y de Marneffe, Marie-Catherine
%Y Nissim, Malvina
%S Proceedings of ACL 2018, Student Research Workshop
%D 2018
%8 July
%I Association for Computational Linguistics
%C Melbourne, Australia
%F wallace-boyd-graber-2018-trick
%X Modern question answering systems have been touted as approaching human performance. However, existing question answering datasets are imperfect tests. Questions are written with humans in mind, not computers, and often do not properly expose model limitations. To address this, we develop an adversarial writing setting, where humans interact with trained models and try to break them. This annotation process yields a challenge set, which despite being easy for trivia players to answer, systematically stumps automated question answering systems. Diagnosing model errors on the evaluation data provides actionable insights to explore in developing robust and generalizable question answering systems.
%R 10.18653/v1/P18-3018
%U https://aclanthology.org/P18-3018
%U https://doi.org/10.18653/v1/P18-3018
%P 127-133
Markdown (Informal)
[Trick Me If You Can: Adversarial Writing of Trivia Challenge Questions](https://aclanthology.org/P18-3018) (Wallace & Boyd-Graber, ACL 2018)
ACL