@InProceedings{dai-cai:2017:SCLeM,
  author    = {Dai, Falcon  and  Cai, Zheng},
  title     = {Glyph-aware Embedding of Chinese Characters},
  booktitle = {Proceedings of the First Workshop on Subword and Character Level Models in NLP},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {64--69},
  abstract  = {Given the advantage and recent success of English character-level and
	subword-unit models in several NLP tasks, we consider the equivalent modeling
	problem for Chinese. Chinese script is logographic and many Chinese logograms
	are composed of common substructures that provide semantic, phonetic and
	syntactic hints. In this work, we propose to explicitly incorporate the visual
	appearance of a character’s glyph in its representation, resulting in a novel
	glyph-aware embedding of Chinese characters. Being inspired by the success of
	convolutional neural networks in computer vision, we use them to incorporate
	the spatio-structural patterns of Chinese glyphs as rendered in raw pixels. In
	the context of two basic Chinese NLP tasks of language modeling and word
	segmentation, the model learns to represent each character’s task-relevant
	semantic and syntactic information in the character-level embedding.},
  url       = {http://www.aclweb.org/anthology/W17-4109}
}