@InProceedings{muller:2017:DiscoMT,
  author    = {M\"{u}ller, Mathias},
  title     = {Treatment of Markup in Statistical Machine Translation},
  booktitle = {Proceedings of the Third Workshop on Discourse in Machine Translation},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {36--46},
  abstract  = {We present work on handling XML markup in Statistical Machine Translation
	(SMT). The methods we propose can be used to effectively preserve markup (for
	instance inline formatting or structure) and to place markup correctly in a
	machine-translated segment. We evaluate our approaches with parallel data that
	naturally contains markup or where markup was inserted to create synthetic
	examples. In our experiments, hybrid reinsertion has proven the most accurate
	method to handle markup, while alignment masking and alignment reinsertion
	should be regarded as viable alternatives. We provide implementations of all
	the methods described and they are freely available as an open-source
	framework.},
  url       = {http://aclweb.org/anthology/W17-4804}
}

