@InProceedings{moore-EtAl:2016:COLING,
  author    = {Moore, Russell  and  Caines, Andrew  and  Graham, Calbert  and  Buttery, Paula},
  title     = {Automated speech-unit delimitation in spoken learner English},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {782--793},
  abstract  = {In order to apply computational linguistic analyses and pass information to
	downstream appli- cations, transcriptions of speech obtained via automatic
	speech recognition (ASR) need to be divided into smaller meaningful units, in a
	task we refer to as ‘speech-unit (SU) delimitation’. We closely recreate
	the automatic delimitation system described by Lee and Glass (2012), ‘Sen-
	tence detection using multiple annotations’, Proceedings of INTERSPEECH,
	which combines a prosodic model, language model and speech-unit length model in
	log-linear fashion. Since state- of-the-art natural language processing (NLP)
	tools have been developed to deal with written text and its characteristic
	sentence-like units, SU delimitation helps bridge the gap between ASR and NLP,
	by normalising spoken data into a more canonical format. Previous work has
	focused on native speaker recordings; we test the system of Lee and Glass
	(2012) on non-native speaker (or ‘learner’) data, achieving performance
	above the state-of-the-art. We also consider alternative evaluation metrics
	which move away from the idea of a single ‘truth’ in SU delimitation, and
	frame this work in the context of downstream NLP applications.},
  url       = {http://aclweb.org/anthology/C16-1075}
}

