@InProceedings{shi-demberg:2017:EACLshort,
  author    = {Shi, Wei  and  Demberg, Vera},
  title     = {Do We Need Cross Validation for Discourse Relation Classification?},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {150--156},
  abstract  = {The task of implicit discourse relation classification has received increased
	attention in recent years, including two CoNNL shared tasks on the topic.
	Existing machine learning models for the task train on sections 2-21 of the
	PDTB and test on section 23, which includes a total of 761 implicit discourse
	relations. In this paper, we'd like to make a methodological point, arguing
	that the standard test set is too small to draw conclusions about whether the
	inclusion of certain features constitute a genuine improvement, or whether one
	got lucky with some properties of the test set, and argue for the adoption of
	cross validation for the discourse relation classification task by the
	community.},
  url       = {http://www.aclweb.org/anthology/E17-2024}
}