@InProceedings{zopf-peyrard-ecklekohler:2016:COLING,
  author    = {Zopf, Markus  and  Peyrard, Maxime  and  Eckle-Kohler, Judith},
  title     = {The Next Step for Multi-Document Summarization: A Heterogeneous Multi-Genre Corpus Built with a Novel Construction Approach},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {1535--1545},
  abstract  = {Research in multi-document summarization has focused on newswire corpora since
	the early beginnings. However, the newswire genre provides genre-specific
	features such as sentence position which are easy to exploit in summarization
	systems. Such easy to exploit genre-specific features are available in other
	genres as well. We therefore present the new hMDS corpus for multi-document
	summarization, which contains heterogeneous source documents from multiple text
	genres, as well as summaries with different lengths. For the construction of
	the corpus, we developed a novel construction approach which is suited to build
	large and heterogeneous summarization corpora with little effort. The method
	reverses the usual process of writing summaries for given source documents: it
	combines already available summaries with appropriate source documents. In a
	detailed analysis, we show that our new corpus is significantly different from
	the homogeneous corpora commonly used, and that it is heterogeneous along
	several dimensions. Our experimental evaluation using well-known
	state-of-the-art summarization systems shows that our corpus poses new
	challenges in the field of multi-document summarization. Last but not least, we
	make our corpus publicly available to the research community at the corpus web
	page https://github.com/AIPHES/hMDS.},
  url       = {http://aclweb.org/anthology/C16-1145}
}

