@InProceedings{wolf-chaumond-delangue:2018:Short,
  author    = {Wolf, Thomas  and  Chaumond, Julien  and  Delangue, Clement},
  title     = {Continuous Learning in a Hierarchical Multiscale Neural Network},
  booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
  month     = {July},
  year      = {2018},
  address   = {Melbourne, Australia},
  publisher = {Association for Computational Linguistics},
  pages     = {1--7},
  abstract  = {We reformulate the problem of encoding a multi-scale representation of a sequence in a language model by casting it in a continuous learning framework. We propose a hierarchical multi-scale language model in which short time-scale dependencies are encoded in the hidden state of a lower-level recurrent neural network while longer time-scale dependencies are encoded in the dynamic of the lower-level network by having a meta-learner update the weights of the lower-level neural network in an online meta-learning fashion. We use elastic weights consolidation as a higher-level to prevent catastrophic forgetting in our continuous learning framework.},
  url       = {http://www.aclweb.org/anthology/P18-2001}
}

