@InProceedings{peters-EtAl:2017:Long,
  author    = {Peters, Matthew  and  Ammar, Waleed  and  Bhagavatula, Chandra  and  Power, Russell},
  title     = {Semi-supervised sequence tagging with bidirectional language models},
  booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
  month     = {July},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {1756--1765},
  abstract  = {Pre-trained word embeddings learned from unlabeled text have become a stan-
	dard component of neural network archi- tectures for NLP tasks. However, in
	most cases, the recurrent network that oper- ates on word-level representations
	to pro- duce context sensitive representations is trained on relatively little
	labeled data. In this paper, we demonstrate a general semi-supervised approach
	for adding pre- trained context embeddings from bidi- rectional language models
	to NLP sys- tems and apply it to sequence labeling tasks. We evaluate our model
	on two stan- dard datasets for named entity recognition (NER) and chunking, and
	in both cases achieve state of the art results, surpassing previous systems
	that use other forms of transfer or joint learning with additional labeled data
	and task specific gazetteers.},
  url       = {http://aclweb.org/anthology/P17-1161}
}