@InProceedings{peyrard-botschen-gurevych:2017:FrontiersSummarization,
  author    = {Peyrard, Maxime  and  Botschen, Teresa  and  Gurevych, Iryna},
  title     = {Learning to Score System Summaries for Better Content Selection Evaluation.},
  booktitle = {Proceedings of the Workshop on New Frontiers in Summarization},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {74--84},
  abstract  = {The evaluation of summaries is a challenging but crucial task of the
	summarization field. In this work, we propose to learn an automatic scoring
	metric based on the human judgements available as part of classical
	summarization datasets like TAC-2008 and TAC-2009. Any existing automatic
	scoring metrics can be included as features, the model learns the combination
	exhibiting the best correlation with human judgments. The reliability of the
	new metric is tested in a further manual evaluation where we ask humans to
	evaluate summaries covering the whole scoring spectrum of the metric. We
	release the trained metric as an open-source tool.},
  url       = {http://www.aclweb.org/anthology/W17-4510}
}

