@inproceedings{oka-kono-2016-original,
title = "Original-Transcribed Text Alignment for {M}anyosyu Written by {O}ld {J}apanese Language",
author = "Oka, Teruaki and
Kono, Tomoaki",
editor = "Hinrichs, Erhard and
Hinrichs, Marie and
Trippel, Thorsten",
booktitle = "Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities ({LT}4{DH})",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4006",
pages = "35--44",
abstract = "We are constructing an annotated diachronic corpora of the Japanese language. In part of thiswork, we construct a corpus of Manyosyu, which is an old Japanese poetry anthology. In thispaper, we describe how to align the transcribed text and its original text semiautomatically to beable to cross-reference them in our Manyosyu corpus. Although we align the original charactersto the transcribed words manually, we preliminarily align the transcribed and original charactersby using an unsupervised automatic alignment technique of statistical machine translation toalleviate the work. We found that automatic alignment achieves an F1-measure of 0.83; thus, each poem has 1{--}2 alignment errors. However, finding these errors and modifying them are less workintensiveand more efficient than fully manual annotation. The alignment probabilities can beutilized in this modification. Moreover, we found that we can locate the uncertain transcriptionsin our corpus and compare them to other transcriptions, by using the alignment probabilities.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oka-kono-2016-original">
<titleInfo>
<title>Original-Transcribed Text Alignment for Manyosyu Written by Old Japanese Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Teruaki</namePart>
<namePart type="family">Oka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoaki</namePart>
<namePart type="family">Kono</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erhard</namePart>
<namePart type="family">Hinrichs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Hinrichs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thorsten</namePart>
<namePart type="family">Trippel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We are constructing an annotated diachronic corpora of the Japanese language. In part of thiswork, we construct a corpus of Manyosyu, which is an old Japanese poetry anthology. In thispaper, we describe how to align the transcribed text and its original text semiautomatically to beable to cross-reference them in our Manyosyu corpus. Although we align the original charactersto the transcribed words manually, we preliminarily align the transcribed and original charactersby using an unsupervised automatic alignment technique of statistical machine translation toalleviate the work. We found that automatic alignment achieves an F1-measure of 0.83; thus, each poem has 1–2 alignment errors. However, finding these errors and modifying them are less workintensiveand more efficient than fully manual annotation. The alignment probabilities can beutilized in this modification. Moreover, we found that we can locate the uncertain transcriptionsin our corpus and compare them to other transcriptions, by using the alignment probabilities.</abstract>
<identifier type="citekey">oka-kono-2016-original</identifier>
<location>
<url>https://aclanthology.org/W16-4006</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>35</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Original-Transcribed Text Alignment for Manyosyu Written by Old Japanese Language
%A Oka, Teruaki
%A Kono, Tomoaki
%Y Hinrichs, Erhard
%Y Hinrichs, Marie
%Y Trippel, Thorsten
%S Proceedings of the Workshop on Language Technology Resources and Tools for Digital Humanities (LT4DH)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F oka-kono-2016-original
%X We are constructing an annotated diachronic corpora of the Japanese language. In part of thiswork, we construct a corpus of Manyosyu, which is an old Japanese poetry anthology. In thispaper, we describe how to align the transcribed text and its original text semiautomatically to beable to cross-reference them in our Manyosyu corpus. Although we align the original charactersto the transcribed words manually, we preliminarily align the transcribed and original charactersby using an unsupervised automatic alignment technique of statistical machine translation toalleviate the work. We found that automatic alignment achieves an F1-measure of 0.83; thus, each poem has 1–2 alignment errors. However, finding these errors and modifying them are less workintensiveand more efficient than fully manual annotation. The alignment probabilities can beutilized in this modification. Moreover, we found that we can locate the uncertain transcriptionsin our corpus and compare them to other transcriptions, by using the alignment probabilities.
%U https://aclanthology.org/W16-4006
%P 35-44
Markdown (Informal)
[Original-Transcribed Text Alignment for Manyosyu Written by Old Japanese Language](https://aclanthology.org/W16-4006) (Oka & Kono, LT4DH 2016)
ACL