@InProceedings{mieskes:2017:EthNLP,
  author    = {Mieskes, Margot},
  title     = {A Quantitative Study of Data in the NLP community},
  booktitle = {Proceedings of the First ACL Workshop on Ethics in Natural Language Processing},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {23--29},
  abstract  = {We present results on a quantitative analysis of publications in the NLP domain
	on collecting, publishing and availability of research data. We find that a
	wide range of publications rely on data crawled from the web, but few give
	details on how potentially sensitive data was treated. Additionally, we find
	that while links to repositories of data are given, they often do not work even
	a short time after publication. We put together several suggestions on how to
	improve this situation based on publications from the NLP domain, but also
	other research areas.},
  url       = {http://www.aclweb.org/anthology/W17-1603}
}

