@InProceedings{erdmann-EtAl:2016:LT4DH,
  author    = {Erdmann, Alexander  and  Brown, Christopher  and  Joseph, Brian  and  Janse, Mark  and  Ajaka, Petra  and  Elsner, Micha  and  de Marneffe, Marie-Catherine},
  title     = {Challenges and Solutions for Latin Named Entity Recognition},
  booktitle = {Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {85--93},
  abstract  = {Although spanning thousands of years and genres as diverse as liturgy,
	historiography, lyric and other forms of prose and poetry, the body of Latin
	texts is still relatively sparse compared to English. Data sparsity in Latin
	presents a number of challenges for traditional Named Entity Recognition
	techniques. Solving such challenges and enabling reliable Named Entity
	Recognition in Latin texts can facilitate many down-stream applications, from
	machine translation to digital historiography, enabling Classicists,
	historians, and archaeologists for instance, to track the relationships of
	historical persons, places, and groups on a large scale. This paper presents
	the first annotated corpus for evaluating Named Entity Recognition in Latin, as
	well as a fully supervised model that achieves over 90% F-score on a held-out
	test set, significantly outperforming a competitive baseline. We also present a
	novel active learning strategy that predicts how many and which sentences need
	to be annotated for named entities in order to attain a specified degree of
	accuracy when recognizing named entities automatically in a given text. This
	maximizes the productivity of annotators while simultaneously controlling
	quality.},
  url       = {http://aclweb.org/anthology/W16-4012}
}

