@InProceedings{emmery-chrupala-daelemans:2017:WNUT,
  author    = {Emmery, Chris  and  Chrupa{\l}a, Grzegorz  and  Daelemans, Walter},
  title     = {Simple Queries as Distant Labels for Predicting Gender on Twitter},
  booktitle = {Proceedings of the 3rd Workshop on Noisy User-generated Text},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {50--55},
  abstract  = {The majority of research on extracting missing user attributes from social
	media profiles use costly hand-annotated labels for supervised learning.
	Distantly supervised methods exist, although these generally rely on knowledge
	gathered using external sources. This paper demonstrates the effectiveness of
	gathering distant labels for self-reported gender on Twitter using simple
	queries. We confirm the reliability of this query heuristic by comparing with
	manual annotation. Moreover, using these labels for distant supervision, we
	demonstrate competitive model performance on the same data as models trained on
	manual annotations. As such, we offer a cheap, extensible, and fast alternative
	that can be employed beyond the task of gender classification.},
  url       = {http://www.aclweb.org/anthology/W17-4407}
}

