@InProceedings{peters-dehdari-vangenabith:2017:BLGNLP2017,
  author    = {Peters, Ben  and  Dehdari, Jon  and  van Genabith, Josef},
  title     = {Massively Multilingual Neural Grapheme-to-Phoneme Conversion},
  booktitle = {Proceedings of the First Workshop on Building Linguistically Generalizable NLP Systems},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {19--26},
  abstract  = {Grapheme-to-phoneme conversion (g2p) is necessary for text-to-speech and
	automatic speech recognition systems. Most g2p systems are monolingual: they
	require language-specific data or handcrafting of rules. Such systems are
	difficult to extend to low resource languages, for which data and handcrafted
	rules are not available. As an alternative, we present a neural
	sequence-to-sequence approach to g2p which is trained on
	spelling--pronunciation pairs in hundreds of languages. The system shares a
	single encoder and decoder across all languages, allowing it to utilize the
	intrinsic similarities between different writing systems. We show an 11%
	improvement in phoneme error rate over an approach based on adapting
	high-resource monolingual g2p models to low-resource languages. Our model is
	also much more compact relative to previous approaches.},
  url       = {http://www.aclweb.org/anthology/W17-5403}
}

