@InProceedings{yao-EtAl:2017:RANLP,
  author    = {Yao, Wenlin  and  Dai, Zeyu  and  Huang, Ruihong  and  Caverlee, James},
  title     = {Online Deception Detection Refueled by Real World Data Collection},
  booktitle = {Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017},
  month     = {September},
  year      = {2017},
  address   = {Varna, Bulgaria},
  publisher = {INCOMA Ltd.},
  pages     = {793--802},
  abstract  = {The lack of large realistic datasets presents a bottleneck in online deception
	detection studies. In this paper, we apply a data collection method based on
	social network analysis to quickly identify high quality deceptive and truthful
	online reviews1 from Amazon. The dataset contains more than 10,000 deceptive
	reviews and is diverse in product domains and reviewers. Using this dataset, we
	explore effective general features for online deception detection that perform
	well across domains. We demonstrate that with generalized features --
	advertising speak and writing complexity scores -- deception detection
	performance can be further improved by adding additional deceptive reviews from
	assorted domains in training. Finally, reviewer level evaluation gives an
	interesting insight into different deceptive reviewers’ writing styles.},
  url       = {https://doi.org/10.26615/978-954-452-049-6_102}
}

