@InProceedings{mishra-diesner:2016:WNUT,
  author    = {Mishra, Shubhanshu  and  Diesner, Jana},
  title     = {Semi-supervised Named Entity Recognition in noisy-text},
  booktitle = {Proceedings of the 2nd Workshop on Noisy User-generated Text (WNUT)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {203--212},
  abstract  = {Many of the existing Named Entity Recognition (NER) solutions are built based
	on news corpus data with proper syntax. These solutions might not lead to
	highly accurate results when being applied to noisy, user generated data, e.g.,
	tweets, which can feature sloppy spelling, concept drift, and limited
	contextualization of terms and concepts due to length constraints. The models
	described in this paper are based on linear chain conditional random fields
	(CRFs), use the BIEOU encoding scheme, and leverage random feature dropout for
	up-sampling the training data. The considered features include word clusters
	and pre-trained distributed word representations, updated gazetteer features,
	and global context predictions. The latter feature allows for ingesting the
	meaning of new or rare tokens into the system via unsupervised learning and for
	alleviating the need to learn lexicon based features, which usually tend to be
	high dimensional. In this paper, we report on the solution [ST] we submitted to
	the WNUT 2016 NER shared task. We also present an improvement over our original
	submission [SI], which we built by using semi-supervised learning on labelled
	training data and pre-trained resourced constructed from unlabelled tweet data.
	Our ST solution achieved an F1 score of 1.2% higher than the baseline (35.1%
	F1) for the task of extracting 10 entity types. The SI resulted in an increase
	of 8.2% in F1 score over the base-line (7.08% over ST). Finally, the SI
	model’s evaluation on the test data achieved a F1 score of 47.3% (~1.15%
	increase over the 2nd best submitted solution). Our experimental setup and
	results are available as a standalone twitter NER tool at
	https://github.com/napsternxg/TwitterNER.},
  url       = {http://aclweb.org/anthology/W16-3927}
}

