@InProceedings{skantze:2017:W17-55,
  author    = {Skantze, Gabriel},
  title     = {Towards a General, Continuous Model of Turn-taking in Spoken Dialogue using LSTM Recurrent Neural Networks},
  booktitle = {Proceedings of the 18th Annual SIGdial Meeting on Discourse and Dialogue},
  month     = {August},
  year      = {2017},
  address   = {Saarbrücken, Germany},
  publisher = {Association for Computational Linguistics},
  pages     = {220--230},
  abstract  = {Previous models of turn-taking have mostly been trained for specific
	turn-taking decisions, such as discriminating between turn shifts and turn
	retention in pauses. In this paper, we present a predictive, continuous model
	of turn-taking using Long Short-Term Memory (LSTM) Recurrent Neural Networks
	(RNN). The model is trained on human-human dialogue data to predict upcoming
	speech activity in a future time window. We show how this general model can be
	applied to two different tasks that it was not specifically trained for. First,
	to predict whether a turn-shift will occur or not in pauses, where the model
	achieves a better performance than human observers, and better than results
	achieved with more traditional models. Second, to make a prediction at speech
	onset whether the utterance will be a short backchannel or a longer utterance.
	Finally, we show how the hidden layer in the network can be used as a feature
	vector for turn-taking decisions in a human-robot interaction scenario.},
  url       = {http://aclweb.org/anthology/W17-5527}
}

