@InProceedings{preoiucpietro-ungar:2018:C18-1,
  author    = {Preoţiuc-Pietro, Daniel  and  Ungar, Lyle},
  title     = {User-Level Race and Ethnicity Predictors from Twitter Text},
  booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico, USA},
  publisher = {Association for Computational Linguistics},
  pages     = {1534--1545},
  abstract  = {User demographic inference from social media text has the potential to improve a range of downstream applications, including real-time passive polling or quantifying demographic bias. This study focuses on developing models for user-level race and ethnicity prediction. We introduce a data set of users who self-report their race/ethnicity through a survey, in contrast to previous approaches that use distantly supervised data or perceived labels. We develop predictive models from text which accurately predict the membership of a user to the four largest racial and ethnic groups with up to .884 AUC and make these available to the research community.},
  url       = {http://www.aclweb.org/anthology/C18-1130}
}

