@InProceedings{straka-strakova:2017:K17-3,
  author    = {Straka, Milan  and  Strakov\'{a}, Jana},
  title     = {Tokenizing, POS Tagging, Lemmatizing and Parsing UD 2.0 with UDPipe},
  booktitle = {Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {88--99},
  abstract  = {Many natural language processing tasks, including the most advanced ones,
	routinely start by several basic processing steps -- tokenization and
	segmentation, most likely also POS tagging and lemmatization, and commonly
	parsing as well. A multilingual pipeline performing these steps can be trained
	using the Universal Dependencies project, which contains annotations of the
	described tasks for 50 languages in the latest release UD 2.0.
	We present an update to UDPipe, a simple-to-use pipeline processing CoNLL-U
	version 2.0 files, which performs these tasks for multiple languages without
	requiring additional external data.  We provide models for all 50 languages of
	UD 2.0, and furthermore, the pipeline can be trained easily using data in
	CoNLL-U format.  UDPipe is a standalone application in C++, with bindings
	available for Python, Java, C\# and Perl.
	In the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal
	Dependencies, UDPipe was the eight best system, while achieving low running
	times and moderately sized models.},
  url       = {http://www.aclweb.org/anthology/K17-3009}
}

