@InProceedings{more-tsarfaty:2016:COLING,
  author    = {More, Amir  and  Tsarfaty, Reut},
  title     = {Data-Driven Morphological Analysis and Disambiguation for Morphologically Rich Languages and Universal Dependencies},
  booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {337--348},
  abstract  = {Parsing texts into universal dependencies (UD) in realistic scenarios requires
	infrastructure for the morphological analysis and disambiguation (MA\&D) of
	typologically different languages as a first tier. MA\&D  is particularly
	challenging in morphologically rich languages (MRLs), where the  ambiguous
	space-delimited tokens ought to be disambiguated with respect to their
	constituent morphemes, each morpheme carrying its own tag and a rich set
	features. Here we present a novel, language-agnostic, framework for MA\&D, based
	on a transition system with two  variants --- word-based and morpheme-based ---
	and a dedicated transition to mitigate the biases of variable-length morpheme
	sequences. Our experiments on a  Modern Hebrew case study show state of the art
	results, and we show that the morpheme-based MD consistently outperforms our
	word-based variant. We further illustrate the utility and multilingual coverage
	 of our framework by morphologically analyzing and disambiguating the large set
	of languages in the UD treebanks.},
  url       = {http://aclweb.org/anthology/C16-1033}
}

