@InProceedings{takeda-komatani:2016:COLING,
  author    = {Takeda, Ryu  and  Komatani, Kazunori},
  title     = {Bayesian Language Model based on Mixture of Segmental Contexts for Spontaneous Utterances with Unexpected Words},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {161--170},
  abstract  = {This paper describes a Bayesian language model for predicting spontaneous
	utterances. 
	People sometimes say unexpected words, such as fillers or hesitations, 
	that cause the miss-prediction of words in normal N-gram models. 
	Our proposed model considers mixtures of possible segmental contexts, 
	that is, a kind of context-word selection. 
	It can reduce negative effects caused by unexpected words 
	because it represents conditional occurrence probabilities of a word as
	weighted mixtures of possible segmental contexts.
	The tuning of mixture weights is the key issue in this approach as the segment
	patterns becomes numerous, thus we resolve it by using Bayesian model. 
	The generative process is achieved by combining the stick-breaking process 
	and the process used in the variable order Pitman-Yor language model. 
	Experimental evaluations revealed that our model outperformed contiguous N-gram
	models in terms of perplexity for noisy text including hesitations.},
  url       = {http://aclweb.org/anthology/C16-1016}
}

