@InProceedings{stratos:2017:EMNLP2017,
  author    = {Stratos, Karl},
  title     = {A Sub-Character Architecture for Korean Language Processing},
  booktitle = {Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {721--726},
  abstract  = {We introduce a novel sub-character architecture that exploits a unique
	compositional structure of the Korean language. Our method decomposes each
	character into a small set of primitive phonetic units called jamo letters from
	which character- and word-level representations are induced. The jamo letters
	divulge syntactic and semantic information that is difficult to access with
	conventional character-level units. They greatly alleviate the data sparsity
	problem, reducing the observation space to 1.6% of the original while
	increasing accuracy in our experiments. We apply our architecture to dependency
	parsing and achieve dramatic improvement over strong lexical baselines.},
  url       = {https://www.aclweb.org/anthology/D17-1075}
}

