@InProceedings{schofield-magnusson-mimno:2017:EACLshort,
  author    = {Schofield, Alexandra  and  Magnusson, M\r{a}ns  and  Mimno, David},
  title     = {Pulling Out the Stops: Rethinking Stopword Removal for Topic Models},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {432--436},
  abstract  = {It is often assumed that topic models benefit from the use of a manually
	curated stopword list. Constructing this list is time-consuming and often
	subject to user judgments about what kinds of words are important to the model
	and the application. Although stopword removal clearly affects which word types
	appear as most probable terms in topics, we argue that this improvement is
	superficial, and that topic inference benefits little from the practice of
	removing stopwords beyond very frequent terms. Removing corpus-specific
	stopwords after model inference is more transparent and produces similar
	results to removing those words prior to inference.},
  url       = {http://www.aclweb.org/anthology/E17-2069}
}

