@InProceedings{nguyen-brooke-baldwin:2017:SCLeM,
  author    = {Nguyen, Viet  and  Brooke, Julian  and  Baldwin, Timothy},
  title     = {Sub-character Neural Language Modelling in Japanese},
  booktitle = {Proceedings of the First Workshop on Subword and Character Level Models in NLP},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {148--153},
  abstract  = {In East Asian languages such as Japanese and Chinese, the semantics of
	  a character are (somewhat) reflected in its sub-character
	  elements. This paper examines the effect of using sub-characters for
	  language modeling in Japanese. This is achieved by decomposing
	  characters according to a range of character decomposition datasets,
	  and training a neural language model over variously decomposed
	  character representations. Our results indicate that language modelling
	  can be improved through the inclusion of sub-characters, though this
	  result depends on a good choice of decomposition dataset and the
	  appropriate granularity of decomposition.},
  url       = {http://www.aclweb.org/anthology/W17-4122}
}

