@InProceedings{schluter:2017:EACLshort,
  author    = {Schluter, Natalie},
  title     = {The limits of automatic summarisation according to ROUGE},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {41--45},
  abstract  = {This paper discusses some central caveats of summarisation, incurred in the use
	of the ROUGE metric for evaluation, with respect to optimal solutions. The task
	is NP-hard, of which we give the first proof.  Still, as we show
	empirically for three central benchmark datasets for the task, greedy
	algorithms empirically seem to perform optimally according to the metric.
	Additionally, overall quality assurance is problematic: there is no natural
	upper bound on the quality of summarisation systems, and even humans are
	excluded from performing optimal summarisation.},
  url       = {http://www.aclweb.org/anthology/E17-2007}
}

