@article{crane-2018-questionable,
title = "Questionable Answers in Question Answering Research: Reproducibility and Variability of Published Results",
author = "Crane, Matt",
editor = "Lee, Lillian and
Johnson, Mark and
Toutanova, Kristina and
Roark, Brian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "6",
year = "2018",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q18-1018",
doi = "10.1162/tacl_a_00018",
pages = "241--252",
abstract = "{``}Based on theoretical reasoning it has been suggested that the reliability of findings published in the scientific literature decreases with the popularity of a research field{''} (Pfeiffer and Hoffmann, 2009). As we know, deep learning is very popular and the ability to reproduce results is an important part of science. There is growing concern within the deep learning community about the reproducibility of results that are presented. In this paper we present a number of controllable, yet unreported, effects that can substantially change the effectiveness of a sample model, and thusly the reproducibility of those results. Through these environmental effects we show that the commonly held belief that distribution of source code is all that is needed for reproducibility is not enough. Source code without a reproducible environment does not mean anything at all. In addition the range of results produced from these effects can be larger than the majority of incremental improvement reported.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="crane-2018-questionable">
<titleInfo>
<title>Questionable Answers in Question Answering Research: Reproducibility and Variability of Published Results</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matt</namePart>
<namePart type="family">Crane</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>“Based on theoretical reasoning it has been suggested that the reliability of findings published in the scientific literature decreases with the popularity of a research field” (Pfeiffer and Hoffmann, 2009). As we know, deep learning is very popular and the ability to reproduce results is an important part of science. There is growing concern within the deep learning community about the reproducibility of results that are presented. In this paper we present a number of controllable, yet unreported, effects that can substantially change the effectiveness of a sample model, and thusly the reproducibility of those results. Through these environmental effects we show that the commonly held belief that distribution of source code is all that is needed for reproducibility is not enough. Source code without a reproducible environment does not mean anything at all. In addition the range of results produced from these effects can be larger than the majority of incremental improvement reported.</abstract>
<identifier type="citekey">crane-2018-questionable</identifier>
<identifier type="doi">10.1162/tacl_a_00018</identifier>
<location>
<url>https://aclanthology.org/Q18-1018</url>
</location>
<part>
<date>2018</date>
<detail type="volume"><number>6</number></detail>
<extent unit="page">
<start>241</start>
<end>252</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Questionable Answers in Question Answering Research: Reproducibility and Variability of Published Results
%A Crane, Matt
%J Transactions of the Association for Computational Linguistics
%D 2018
%V 6
%I MIT Press
%C Cambridge, MA
%F crane-2018-questionable
%X “Based on theoretical reasoning it has been suggested that the reliability of findings published in the scientific literature decreases with the popularity of a research field” (Pfeiffer and Hoffmann, 2009). As we know, deep learning is very popular and the ability to reproduce results is an important part of science. There is growing concern within the deep learning community about the reproducibility of results that are presented. In this paper we present a number of controllable, yet unreported, effects that can substantially change the effectiveness of a sample model, and thusly the reproducibility of those results. Through these environmental effects we show that the commonly held belief that distribution of source code is all that is needed for reproducibility is not enough. Source code without a reproducible environment does not mean anything at all. In addition the range of results produced from these effects can be larger than the majority of incremental improvement reported.
%R 10.1162/tacl_a_00018
%U https://aclanthology.org/Q18-1018
%U https://doi.org/10.1162/tacl_a_00018
%P 241-252
Markdown (Informal)
[Questionable Answers in Question Answering Research: Reproducibility and Variability of Published Results](https://aclanthology.org/Q18-1018) (Crane, TACL 2018)
ACL