@InProceedings{ding-bickel-pan:2017:EMNLP2017,
  author    = {Ding, Tao  and  Bickel, Warren K.  and  Pan, Shimei},
  title     = {Multi-View Unsupervised User Feature Embedding for Social Media-based Substance Use Prediction},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {2275--2284},
  abstract  = {In this paper, we demonstrate how the state-of-the-art machine learning and
	text mining techniques can be used to build effective social media-based
	substance use detection systems.  Since a substance use ground truth is
	difficult to obtain on a large scale, to maximize system performance, we
	explore different unsupervised feature learning methods to take advantage of a
	large amount of unsupervised social media data. We also demonstrate the benefit
	of using multi-view unsupervised feature learning to combine heterogeneous user
	information such as Facebook "likes" and  "status updates"  to enhance system
	performance.  Based on our evaluation, our best models achieved 86% AUC for
	predicting tobacco use,  81% for alcohol use and 84% for illicit drug use, all
	of which significantly outperformed existing methods. Our investigation has
	also uncovered interesting relations between a user's social media behavior
	(e.g., word usage) and substance use.},
  url       = {https://www.aclweb.org/anthology/D17-1241}
}

