@InProceedings{alishahi-barking-chrupala:2017:CoNLL,
  author    = {Alishahi, Afra  and  Barking, Marie  and  Chrupa{\l}a, Grzegorz},
  title     = {Encoding of phonology in a recurrent neural model of grounded speech},
  booktitle = {Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {368--378},
  abstract  = {We study the representation and encoding of phonemes in a recurrent
	  neural network model of grounded speech. We use a model which
	  processes images and their spoken descriptions, and projects the
	  visual and auditory representations into the same semantic space. We
	  perform a number of analyses on how information about individual
	  phonemes is encoded in the MFCC features extracted from the speech
	  signal, and the activations of the layers of the model. Via
	  experiments with phoneme decoding and phoneme discrimination we show
	  that phoneme representations are most salient in the lower layers of
	  the model, where low-level signals are processed at a fine-grained
	  level, although a large amount of phonological information is retain at
	  the top recurrent layer. We further find out that the
	  attention mechanism following the top recurrent layer significantly
	  attenuates encoding of phonology and makes the utterance embeddings
	  much more invariant to synonymy. Moreover, a hierarchical clustering
	  of phoneme representations learned by the network shows an
	  organizational structure of phonemes similar to those proposed in
	  linguistics.},
  url       = {http://aclweb.org/anthology/K17-1037}
}

