@InProceedings{sun:2016:COLING,
  author    = {Sun, Xu},
  title     = {Asynchronous Parallel Learning for Neural Networks and Structured Models with Dense Features},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {192--202},
  abstract  = {Existing asynchronous parallel learning methods are only for the sparse feature
	models, and they face new challenges for the dense feature models like neural
	networks (e.g., LSTM, RNN). The problem for dense features is that asynchronous
	parallel learning brings gradient errors derived from overwrite actions. We
	show that gradient errors are very common and inevitable. Nevertheless, our
	theoretical analysis shows that the learning process with gradient errors can
	still be convergent towards the optimum of objective functions for many
	practical applications. Thus, we propose a simple method \emph{AsynGrad} for
	asynchronous parallel learning with gradient error. Base on various dense
	feature models (LSTM, dense-CRF) and various NLP tasks, experiments show that
	\emph{AsynGrad} achieves substantial improvement on training speed, and without
	any loss on accuracy.},
  url       = {http://aclweb.org/anthology/C16-1019}
}