@InProceedings{argueta-chiang:2017:EACLlong,
  author    = {Argueta, Arturo  and  Chiang, David},
  title     = {Decoding with Finite-State Transducers on GPUs},
  booktitle = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {1044--1052},
  abstract  = {Weighted finite  automata and transducers (including hidden Markov models and
	conditional random fields) are widely used in natural language processing (NLP)
	to perform tasks such as morphological analysis, part-of-speech tagging,
	chunking, named entity recognition, speech recognition, and others. 
	Parallelizing finite state algorithms on graphics processing units (GPUs) would
	benefit many areas of NLP. Although researchers have implemented GPU versions
	of basic graph algorithms, no work, to our knowledge, has been done on GPU
	algorithms for weighted finite automata. We introduce a GPU implementation of
	the Viterbi and forward-backward algorithm, achieving speedups of up to 4x over
	our serial implementations running on different computer architectures and
	3335x over widely used tools such as OpenFST.},
  url       = {http://www.aclweb.org/anthology/E17-1098}
}