@InProceedings{dey-shrivastava-kaushik:2016:COLING,
  author    = {Dey, Kuntal  and  Shrivastava, Ritvik  and  Kaushik, Saroj},
  title     = {A Paraphrase and Semantic Similarity Detection System for User Generated Short-Text Content on Microblogs},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {2880--2890},
  abstract  = {Existing systems deliver high accuracy and F1-scores for detecting paraphrase
	and semantic similarity on traditional clean-text corpus. For instance, on the
	clean-text Microsoft Paraphrase benchmark database, the existing systems attain
	an accuracy as high as 0:8596. However, existing systems for detecting
	paraphrases and semantic similarity on user-generated short-text content on
	microblogs such as Twitter, comprising of noisy and ad hoc short-text, needs
	significant research attention. In this paper, we propose a machine learning
	based approach towards this. We propose a set of features that, although
	well-known in the NLP literature for solving other problems, have not been
	explored for detecting paraphrase or semantic similarity, on noisy
	user-generated short-text data such as Twitter. We apply support vector machine
	(SVM) based learning. We use the benchmark Twitter paraphrase data, released as
	a part of SemEval 2015, for experiments. Our system delivers a paraphrase
	detection F1-score of 0.717 and semantic similarity detection F1-score of
	0.741, thereby significantly outperforming the existing systems, that deliver
	F1-scores of 0.696 and 0.724 for the two problems respectively. Our features
	also allow us to obtain a rank among the top-10, when trained on the Microsoft
	Paraphrase corpus and tested on the corresponding test data, thereby
	empirically establishing our approach as ubiquitous across the different
	paraphrase detection databases.},
  url       = {http://aclweb.org/anthology/C16-1271}
}

