@InProceedings{li-EtAl:2018:W18-12,
  author    = {Li, Bofang  and  Drozd, Aleksandr  and  Liu, Tao  and  Du, Xiaoyong},
  title     = {Subword-level Composition Functions for Learning Word Embeddings},
  booktitle = {Proceedings of the Second Workshop on Subword/Character LEvel Models},
  month     = {June},
  year      = {2018},
  address   = {New Orleans},
  publisher = {Association for Computational Linguistics},
  pages     = {38--48},
  abstract  = {Subword-level information is crucial for capturing the meaning and morphology of words, especially for out-of-vocabulary entries. We propose CNN- and RNN-based subword-level composition functions for learning word embeddings, and systematically compare them with popular word-level and subword-level models (Skip-Gram and FastText). Additionally, we propose a hybrid training scheme in which a pure subword-level model is trained jointly with a conventional word-level embedding model based on lookup-tables. This increases the fitness of all types of subword-level word embeddings; the word-level embeddings can be discarded after training, leaving only compact subword-level representation with much smaller data volume. We evaluate these embeddings on a set of intrinsic and extrinsic tasks, showing that subword-level models have advantage on tasks related to morphology and datasets with high OOV rate, and can be combined with other types of embeddings.},
  url       = {http://www.aclweb.org/anthology/W18-1205}
}

