@InProceedings{moss-leslie-rayson:2018:C18-1,
  author    = {Moss, Henry  and  Leslie, David  and  Rayson, Paul},
  title     = {Using J-K-fold Cross Validation To Reduce Variance When Tuning NLP Models},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {2978--2989},
  abstract  = {K-fold cross validation (CV) is a popular method for estimating the true performance of machine learning models, allowing model selection and parameter tuning. However, the very process of CV requires random partitioning of the data and so our performance estimates are in fact stochastic, with variability that can be substantial for natural language processing tasks. We demonstrate that these unstable estimates cannot be relied upon for effective parameter tuning. The resulting tuned parameters are highly sensitive to how our data is partitioned, meaning that we often select sub-optimal parameter choices and have serious reproducibility issues.},
  url       = {http://www.aclweb.org/anthology/C18-1252}
}