@InProceedings{ljubevsic-erjavec-fivser:2017:BSNLP,
  author    = {Ljube\v{s}i\'{c}, Nikola  and  Erjavec, Toma\v{z}  and  Fi\v{s}er, Darja},
  title     = {Adapting a State-of-the-Art Tagger for South Slavic Languages to Non-Standard Text},
  booktitle = {Proceedings of the 6th Workshop on Balto-Slavic Natural Language Processing},
  month     = {April},
  year      = {2017},
  address   = {Valencia, Spain},
  publisher = {Association for Computational Linguistics},
  pages     = {60--68},
  abstract  = {In this paper we present the adaptations of a state-of-the-art tagger for South
	Slavic languages to non-standard texts on the example of the Slovene language.
	We investigate the impact of introducing in-domain training data as well as
	additional supervision through external resources or tools like word clusters
	and word normalization. We remove more than half of the error of the standard
	tagger when applied to non-standard texts by training it on a combination of
	standard and non-standard training data, while enriching the data
	representation with external resources removes additional 11 percent of the
	error. The final configuration achieves tagging accuracy of 87.41% on the full
	morphosyntactic description, which is, nevertheless, still quite far from the
	accuracy of 94.27% achieved on standard text.},
  url       = {http://www.aclweb.org/anthology/W17-1410}
}

