@InProceedings{su-greene-boydell:2016:WNUT,
  author    = {Su, Jing  and  Greene, Derek  and  Boydell, Oisin},
  title     = {Topic Stability over Noisy Sources},
  booktitle = {Proceedings of the 2nd Workshop on Noisy User-generated Text (WNUT)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {85--93},
  abstract  = {Topic modelling techniques such as LDA have recently been applied to speech
	transcripts and OCR output. These corpora may contain noisy or erroneous texts
	which may undermine topic stability. Therefore, it is important to know how
	well a topic modelling algorithm will perform when applied to noisy data. In
	this paper we show that different types of textual noise can have diverse
	effects on the stability of topic models.  On the other hand, topic model
	stability is not consistent with the same type but different levels of noise.
	We introduce a dictionary filtering approach to address this challenge, with
	the result that a topic model with the correct number of topics is always
	identified across different levels of noise.},
  url       = {http://aclweb.org/anthology/W16-3913}
}

