@InProceedings{labeau-allauzen:2017:SCLeM,
  author    = {Labeau, Matthieu  and  Allauzen, Alexandre},
  title     = {Character and Subword-Based Word Representation for Neural Language Modeling Prediction},
  booktitle = {Proceedings of the First Workshop on Subword and Character Level Models in NLP},
  month     = {September},
  year      = {2017},
  address   = {Copenhagen, Denmark},
  publisher = {Association for Computational Linguistics},
  pages     = {1--13},
  abstract  = {Most of neural language models use different kinds of embeddings for
	  word prediction. While word embeddings can be associated to each
	  word in the vocabulary or derived from characters as well as
	  factored morphological decomposition, these word representations are
	  mainly used to parametrize the input, i.e. the context of
	  prediction.  This work investigates the effect of using subword
	  units (character and factored morphological decomposition) to build
	  output representations for neural language modeling. We present a
	  case study on Czech, a morphologically-rich language, experimenting
	  with different input and output representations.  When working with
	  the full training vocabulary, despite unstable training, our
	  experiments show that augmenting the output word representations
	  with character-based embeddings can significantly improve the
	  performance of the model. Moreover, reducing the size of the output
	  look-up table, to let the character-based embeddings represent rare
	  words, brings further improvement.},
  url       = {http://www.aclweb.org/anthology/W17-4101}
}

