@InProceedings{flint-EtAl:2017:WNUT,
  author    = {Flint, Emma  and  Ford, Elliot  and  Thomas, Olivia  and  Caines, Andrew  and  Buttery, Paula},
  title     = {A Text Normalisation System for Non-Standard English Words},
  booktitle = {Proceedings of the 3rd Workshop on Noisy User-generated Text},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {107--115},
  abstract  = {This paper investigates the problem of text normalisation; specifically, the
	normalisation of non-standard words (NSWs) in English. Non-standard words can
	be defined as those word tokens which do not have a dictionary entry, and
	cannot be pronounced using the usual letter-to-phoneme conversion rules; e.g.
	lbs, 99.3%, \#EMNLP2017. NSWs pose a challenge to the proper functioning of
	text-to-speech technology, and the solution is to spell them out in such a way
	that they can be pronounced appropriately. We describe our four-stage
	normalisation system made up of components for detection, classification,
	division and expansion of NSWs. Performance is favourabe compared to previous
	work in the field (Sproat et al. 2001, Normalization of non-standard words), as
	well as state-of-the-art text-to-speech software. Further, we update Sproat et
	al.'s NSW taxonomy, and create a more customisable system where users are able
	to input their own abbreviations and specify into which variety of English
	(currently available: British or American) they wish to normalise.},
  url       = {http://www.aclweb.org/anthology/W17-4414}
}

