@InProceedings{salehi-sogaard:2017:WNUT,
  author    = {Salehi, Bahar  and  S{\o}gaard, Anders},
  title     = {Evaluating hypotheses in geolocation on a very large sample of Twitter},
  booktitle = {Proceedings of the 3rd Workshop on Noisy User-generated Text},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {62--67},
  abstract  = {Recent work in geolocation has made several hypotheses about what linguistic
	markers are relevant to detect where people write from. In this paper, we
	examine six hypotheses against a corpus consisting of all geo-tagged tweets
	from the
	US, or whose geo-tags could be inferred, in a 19% sample of Twitter history.
	Our
	experiments lend support to all six hypotheses, including that spelling
	variants
	and hashtags are strong predictors of location. We also study what kinds of
	common nouns are predictive of location after controlling for named entities
	such as dolphins or sharks},
  url       = {http://www.aclweb.org/anthology/W17-4409}
}

