@InProceedings{ghaddar-langlais:2017:I17-1,
  author    = {Ghaddar, Abbas  and  Langlais, Phillippe},
  title     = {WiNER: A Wikipedia Annotated Corpus for Named Entity Recognition},
  booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
  month     = {November},
  year      = {2017},
  address   = {Taipei, Taiwan},
  publisher = {Asian Federation of Natural Language Processing},
  pages     = {413--422},
  abstract  = {We revisit the idea of mining Wikipedia in order to generate named-entity 
	annotations. We propose a new methodology that we applied to English Wikipedia
	to build WiNER, a large, high quality, annotated corpus. We evaluate its
	usefulness on 6 NER tasks, comparing 4 popular state-of-the art approaches. We
	show that LSTM-CRF is the approach that benefits the most from our corpus. We
	report impressive gains with this model when using a small portion of WiNER on
	top of the CONLL training material. Last, we propose a simple but efficient
	method for exploiting the full range of WiNER, leading to further improvements.},
  url       = {http://www.aclweb.org/anthology/I17-1042}
}

