@InProceedings{strubell-EtAl:2017:EMNLP2017,
  author    = {Strubell, Emma  and  Verga, Patrick  and  Belanger, David  and  McCallum, Andrew},
  title     = {Fast and Accurate Entity Recognition with Iterated Dilated Convolutions},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {2670--2680},
  abstract  = {Today when many practitioners run basic NLP on the entire web and large-volume
	traffic, faster methods are paramount to saving time and energy costs.
	Recent advances in GPU hardware have led to the emergence of bi-directional
	LSTMs as a standard method for obtaining per-token vector representations
	serving as input to labeling tasks such as NER (often followed by prediction in
	a linear-chain CRF). 
	Though expressive and accurate, these models fail to fully exploit GPU
	parallelism, limiting their computational efficiency.
	This paper proposes a faster alternative to Bi-LSTMs for NER: Iterated Dilated
	Convolutional Neural Networks (ID-CNNs), which have better capacity than
	traditional CNNs for large context and structured prediction.  
	Unlike LSTMs whose sequential processing on sentences of length N requires O(N)
	time even in the face of parallelism, ID-CNNs permit fixed-depth convolutions
	to run in parallel across entire documents.
	We describe a distinct combination of network structure, parameter sharing and
	training procedures that enable dramatic 14-20x test-time speedups while
	retaining accuracy comparable to the Bi-LSTM-CRF. Moreover, ID-CNNs trained to
	aggregate context from the entire document are more accurate than Bi-LSTM-CRFs
	while attaining 8x faster test time speeds.},
  url       = {https://www.aclweb.org/anthology/D17-1283}
}

