@InProceedings{oka-kono:2016:LT4DH,
  author    = {Oka, Teruaki  and  Kono, Tomoaki},
  title     = {Original-Transcribed Text Alignment for Manyosyu Written by Old Japanese Language},
  booktitle = {Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)},
  month     = {December},
  year      = {2016},
  address   = {Osaka, Japan},
  publisher = {The COLING 2016 Organizing Committee},
  pages     = {35--44},
  abstract  = {We are constructing an annotated diachronic corpora of the Japanese language.
	In part of thiswork, we construct a corpus of Manyosyu, which is an old
	Japanese poetry anthology. In thispaper, we describe how to align the
	transcribed text and its original text semiautomatically to beable to
	cross-reference them in our Manyosyu corpus. Although we align the original
	charactersto the transcribed words manually, we preliminarily align the
	transcribed and original charactersby using an unsupervised automatic alignment
	technique of statistical machine translation toalleviate the work. We found
	that automatic alignment achieves an F1-measure of 0.83; thus, eachpoem has
	1--2 alignment errors. However, finding these errors and modifying them are
	less workintensiveand more efficient than fully manual annotation. The
	alignment probabilities can beutilized in this modification. Moreover, we found
	that we can locate the uncertain transcriptionsin our corpus and compare them
	to other transcriptions, by using the alignment probabilities.},
  url       = {http://aclweb.org/anthology/W16-4006}
}

