@inproceedings{L16-1676,
 abstract = {In this paper we present newly developed inflectional lexcions and manually annotated corpora of Croatian and Serbian. We introduce hrLex and srLex - two freely available inflectional lexicons of Croatian and Serbian - and describe the process of building these lexicons, supported by supervised machine learning techniques for lemma and paradigm prediction. Furthermore, we introduce hr500k, a manually annotated corpus of Croatian, 500 thousand tokens in size. We showcase the three newly developed resources on the task of morphosyntactic annotation of both languages by using a recently developed CRF tagger. We achieve best results yet reported on the task for both languages, beating the HunPos baseline trained on the same datasets by a wide margin.
},
 address = {Portorož, Slovenia},
 author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {4264--4270},
 publisher = {European Language Resources Association (ELRA)},
 title = {New Inflectional Lexicons and Training Corpora for Improved Morphosyntactic Annotation of Croatian and Serbian},
 url = {https://www.aclweb.org/anthology/L16-1676},
 year = {2016}
}

