@InProceedings{hanselowski-EtAl:2018:C18-1,
  author    = {Hanselowski, Andreas  and  PVS, Avinesh  and  Schiller, Benjamin  and  Caspelherr, Felix  and  Chaudhuri, Debanjan  and  Meyer, Christian M.  and  Gurevych, Iryna},
  title     = {A Retrospective Analysis of the Fake News Challenge Stance-Detection Task},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1859--1874},
  abstract  = {The 2017 Fake News Challenge Stage 1 (FNC-1) shared task addressed a stance classification task as a crucial first step towards detecting fake news. To date, there is no in-depth analysis paper to critically discuss FNC-1’s experimental setup, reproduce the results, and draw conclusions for next-generation stance classification methods. In this paper, we provide such an in-depth analysis for the three top-performing systems. We first find that FNC-1’s proposed evaluation metric favors the majority class, which can be easily classified, and thus overestimates the true discriminative power of the methods. Therefore, we propose a new F1-based metric yielding a changed system ranking. Next, we compare the features and architectures used, which leads to a novel feature-rich stacked LSTM model that performs on par with the best systems, but is superior in predicting minority classes. To understand the methods’ ability to generalize, we derive a new dataset and perform both in-domain and cross-domain experiments. Our qualitative and quantitative study helps interpreting the original FNC-1 scores and understand which features help improving performance and why. Our new dataset and all source code used during the reproduction study are publicly available for future research.},
  url       = {http://www.aclweb.org/anthology/C18-1158}
}

