@inproceedings{L16-1479,
 abstract = {Much research has focused on detecting trends on Twitter, including health-related trends such as mentions of Influenza-like illnesses or their symptoms. The majority of this research has been conducted using Twitter's public feed, which includes only about 1\% of all public tweets. It is unclear if, when, and how using Twitter's 1\% feed has affected the evaluation of trend detection methods. In this work we use a larger feed to investigate the effects of sampling on Twitter trend detection. We focus on using health-related trends to estimate the prevalence of Influenza-like illnesses based on tweets. We use ground truth obtained from the CDC and Google Flu Trends to explore how the prevalence estimates degrade when moving from a 100\% to a 1\% sample. We find that using the 1\% sample is unlikely to substantially harm ILI estimates made at the national level, but can cause poor performance when estimates are made at the city level.
},
 address = {Portorož, Slovenia},
 author = {Andrew Yates and Alek Kolcz and Nazli Goharian and Ophir Frieder},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {2998--3005},
 publisher = {European Language Resources Association (ELRA)},
 title = {Effects of Sampling on Twitter Trend Detection},
 url = {https://www.aclweb.org/anthology/L16-1479},
 year = {2016}
}

