@InProceedings{yang-lepage:2016:WAT2016,
  author    = {Yang, Wei  and  Lepage, Yves},
  title     = {Improving Patent Translation using Bilingual Term Extraction and Re-tokenization for Chinese--Japanese},
  booktitle = {Proceedings of the 3rd Workshop on Asian Translation (WAT2016)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {194--202},
  abstract  = {Unlike European languages, many Asian languages like Chinese and Japanese do
	not have typographic boundaries in written system. Word segmentation
	(tokenization) that break sentences down into individual words (tokens) is
	normally treated as the first step for machine translation (MT). For Chinese
	and Japanese, different rules and segmentation tools lead different
	segmentation results in different level of granularity between Chinese and
	Japanese. To improve the translation accuracy, we adjust and balance the
	granularity of segmentation results around terms for Chinese--Japanese patent
	corpus for training translation model. In this paper, we describe a statistical
	machine translation (SMT) system which is built on re-tokenized
	Chinese--Japanese patent training corpus using extracted bilingual multi-word
	terms.},
  url       = {http://aclweb.org/anthology/W16-4619}
}

