@InProceedings{rieman-EtAl:2017:I17-1,
  author    = {Rieman, Daniel  and  Jaidka, Kokil  and  Schwartz, H. Andrew  and  Ungar, Lyle},
  title     = {Domain Adaptation from User-level Facebook Models to County-level Twitter Predictions},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {764--773},
  abstract  = {Several studies have demonstrated how language models of user attributes, such
	as personality, can be built by using the Facebook language of social media
	users in conjunction with their responses to psychology questionnaires. It is
	challenging to apply these models to make general predictions about attributes
	of communities, such as personality distributions across US counties, because
	it requires 1. the potentially inavailability of the original training data
	because of privacy and ethical regulations, 2. adapting Facebook language
	models to Twitter language without retraining the model, and 3. adapting from
	users to county-level collections of tweets. We propose a two-step algorithm,
	Target Side Domain Adaptation (TSDA) for such domain adaptation when no labeled
	Twitter/county data is available. TSDA corrects for the different word
	distributions between Facebook and Twitter and for the varying word
	distributions across counties by adjusting target side word frequencies; no
	changes to the trained model are made. In the case of predicting the Big Five
	county-level personality traits, TSDA outperforms a state-of-the-art domain
	adaptation method, gives county-level predictions that have fewer extreme
	outliers, higher year-to-year stability, and higher correlation with
	county-level outcomes.},
  url       = {http://www.aclweb.org/anthology/I17-1077}
}