@InProceedings{ruzsics-samardzic:2017:CoNLL,
  author    = {Ruzsics, Tatyana  and  Samardzic, Tanja},
  title     = {Neural Sequence-to-sequence Learning of Internal Word Structure},
  booktitle = {Proceedings of the 21st Conference on Computational Natural Language Learning (CoNLL 2017)},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {184--194},
  abstract  = {Learning internal word structure has recently been recognized as an important
	step in various multilingual processing tasks and in theoretical language
	comparison. In this paper, we present a neural encoder-decoder model for
	learning canonical morphological segmentation. Our model combines
	character-level sequence-to-sequence transformation with a language model over
	canonical segments. We obtain up to 4% improvement over a strong
	character-level encoder-decoder baseline for three languages. Our model
	outperforms the previous state-of-the-art for two languages, while eliminating
	the need for external resources such as large dictionaries. Finally, by
	comparing the performance of encoder-decoder and classical statistical machine
	translation systems trained with and without corpus counts, we show that
	including corpus counts is beneficial to both approaches.},
  url       = {http://aclweb.org/anthology/K17-1020}
}

