@InProceedings{zhou-zhang-he:2017:EACLlong,
  author    = {Zhou, Deyu  and  Zhang, Xuan  and  He, Yulan},
  title     = {Event extraction from Twitter using Non-Parametric Bayesian Mixture Model with Word Embeddings},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {808--817},
  abstract  = {To extract structured representations of newsworthy events from Twitter,
	unsupervised models typically assume that tweets involving the same named
	entities and expressed using similar words are likely to belong to the same
	event. Hence, they group tweets into clusters based on the co-occurrence
	patterns of named entities and topical keywords. However, there are two main
	limitations. First, they require the number of events to be known beforehand,
	which is not realistic in practical applications. Second, they don't recognise
	that the same named entity might be referred to by multiple mentions and tweets
	using different mentions would be wrongly assigned to different events. To
	overcome these limitations, we propose a non-parametric Bayesian mixture model
	with word embeddings for event extraction, in which the number of events can be
	inferred automatically and the issue of lexical variations for the same named
	entity can be dealt with properly. Our model has been evaluated on three
	datasets with sizes ranging between 2,499 and over 60 million tweets.
	Experimental results show that our model outperforms the baseline approach on
	all datasets by 5-8% in F-measure.},
  url       = {http://www.aclweb.org/anthology/E17-1076}
}

