@InProceedings{afli-way:2016:LT4DH,
  author    = {Afli, Haithem  and  Way, Andy},
  title     = {Integrating Optical Character Recognition and Machine Translation of Historical Documents},
  booktitle = {Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {109--116},
  abstract  = {Machine Translation (MT) plays a critical role in expanding capacity in the
	translation industry.
	However, many valuable documents, including digital documents, are encoded in
	non-accessible formats for machine processing (e.g., Historical or Legal
	documents). 
	Such documents must be passed through a process of Optical Character
	Recognition (OCR) to render the text suitable for MT. 
	No matter how good the OCR is, this process introduces recognition
	errors, which often renders MT ineffective. In this paper, we propose a new OCR
	to MT framework based on adding a new OCR error correction module to enhance
	the overall quality of translation.
	Experimentation shows that our new system correction based on the combination
	of Language Modeling and Translation methods outperforms the baseline system by
	nearly 30% relative improvement.},
  url       = {http://aclweb.org/anthology/W16-4015}
}

