@InProceedings{dang-EtAl:2016:COLING,
  author    = {Dang, Anh  and  Moh'd, Abidalrahman  and  Islam, Aminul  and  Minghim, Rosane  and  Smit, Michael  and  Milios, Evangelos},
  title     = {Reddit Temporal N-gram Corpus and its Applications on Paraphrase and Semantic Similarity in Social Media using a Topic-based Latent Semantic Analysis},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {3553--3564},
  abstract  = {This paper introduces a new large-scale n-gram corpus that is created
	specifically from social media text. Two distinguishing characteristics of this
	corpus are its monthly temporal attribute and that it is created from 1.65
	billion comments of user-generated text in Reddit. The usefulness of this
	corpus is exemplified and evaluated by a novel Topic-based Latent Semantic
	Analysis (TLSA) algorithm. The experimental results show that unsupervised TLSA
	outperforms all the state-of-the-art unsupervised and semi-supervised methods
	in SEMEVAL 2015: paraphrase and semantic similarity in Twitter tasks.},
  url       = {http://aclweb.org/anthology/C16-1335}
}

