@InProceedings{hornby-taylor-park:2017:K17-3,
  author    = {Hornby, Ryan  and  Taylor, Clark  and  Park, Jungyeul},
  title     = {Corpus Selection Approaches for Multilingual Parsing from Raw Text to Universal Dependencies},
  booktitle = {Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {198--206},
  abstract  = {This paper describes UALing's approach to the \udst{} using corpus selection
	techniques to reduce training data size.  The methodology is simple: we use
	similarity measures to select a corpus from available training data (even from
	multiple corpora for surprise languages) and use the resulting corpus to
	complete the parsing task.  The training and parsing is done with the baseline
	UDPipe system \cite{straka-hajic-strakova:2016:LREC}. While our approach
	reduces the size of training data significantly, it retains performance within
	0.5\% of the baseline system. Due to the reduction in training data size, our
	system performs faster than the na\"ive, complete corpus method.  Specifically,
	our system runs in less than 10 minutes, ranking it among the fastest entries
	for this task.
	Our system is available at \url{https://github.com/CoNLL-UD-2017/UALING}.},
  url       = {http://www.aclweb.org/anthology/K17-3021}
}

