@InProceedings{sukhareva-EtAl:2017:LaTeCH-CLfL,
  author    = {Sukhareva, Maria  and  Fuscagni, Francesco  and  Daxenberger, Johannes  and  G\"{o}rke, Susanne  and  Prechel, Doris  and  Gurevych, Iryna},
  title     = {Distantly Supervised POS Tagging of Low-Resource Languages under Extreme Data Sparsity: The Case of Hittite},
  booktitle = {Proceedings of the Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature},
  month     = {August},
  year      = {2017},
  address   = {Vancouver, Canada},
  publisher = {Association for Computational Linguistics},
  pages     = {95--104},
  abstract  = {This paper presents a statistical approach to automatic morphosyntactic
	annotation of Hittite transcripts. Hittite is an extinct Indo-European language
	using the cuneiform script. There are currently no morphosyntactic annotations
	available for Hittite, so we explored methods of distant supervision. The
	annotations were projected from parallel German translations of the Hittite
	texts. In order to reduce data sparsity, we applied stemming of German and
	Hittite texts. As there is no off-the-shelf Hittite stemmer, a stemmer for
	Hittite was developed for this purpose. The resulting annotation projections
	were used to train a POS tagger, achieving an accuracy of 69% on a test sample.
	To our knowledge, this is the first attempt of statistical POS tagging of a
	cuneiform language.},
  url       = {http://www.aclweb.org/anthology/W17-2213}
}

