@InProceedings{volske-EtAl:2017:FrontiersSummarization,
  author    = {V\"{o}lske, Michael  and  Potthast, Martin  and  Syed, Shahbaz  and  Stein, Benno},
  title     = {TL;DR: Mining Reddit to Learn Automatic Summarization},
  booktitle = {Proceedings of the Workshop on New Frontiers in Summarization},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {59--63},
  abstract  = {Recent advances in automatic text summarization have used deep neural networks
	to generate high-quality abstractive summaries, but the performance of these
	models strongly depends on large amounts of suitable training data. We propose
	a new method for mining social media for author-provided summaries, taking
	advantage of the common practice of appending a ``TL;DR'' to long posts. A case
	study using a large Reddit crawl yields the Webis-TLDR-17 dataset,
	complementing existing corpora primarily from the news genre. Our technique is
	likely applicable to other social media sites and general web crawls.},
  url       = {http://www.aclweb.org/anthology/W17-4508}
}