@InProceedings{elsner-shain:2017:EMNLP2017,
  author    = {Elsner, Micha  and  Shain, Cory},
  title     = {Speech segmentation with a neural encoder model of working memory},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {1070--1080},
  abstract  = {We present the first unsupervised LSTM speech segmenter as a cognitive model of
	the acquisition of words from unsegmented input. Cognitive biases toward
	phonological and syntactic predictability in speech are rooted in the
	limitations of human memory (Baddeley et al., 1998); compressed representations
	are easier to acquire and retain in memory. To model the biases introduced by
	these memory limitations, our system uses an LSTM-based encoder-decoder with a
	small number of hidden units, then searches for a segmentation that minimizes
	autoencoding loss. Linguistically meaningful segments (e.g. words) should share
	regular patterns of features that facilitate decoder performance in comparison
	to random segmentations, and we show that our learner discovers these patterns
	when trained on either phoneme sequences or raw acoustics. To our knowledge,
	ours is the first fully unsupervised system to be able to segment both symbolic
	and acoustic representations of speech.},
  url       = {https://www.aclweb.org/anthology/D17-1112}
}

