@inproceedings{omura-asahara-2018-ud,
title = "{UD}-{J}apanese {BCCWJ}: {U}niversal {D}ependencies Annotation for the {B}alanced {C}orpus of {C}ontemporary {W}ritten {J}apanese",
author = "Omura, Mai and
Asahara, Masayuki",
editor = "de Marneffe, Marie-Catherine and
Lynn, Teresa and
Schuster, Sebastian",
booktitle = "Proceedings of the Second Workshop on Universal Dependencies ({UDW} 2018)",
month = nov,
year = "2018",
address = "Brussels, Belgium",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-6014",
doi = "10.18653/v1/W18-6014",
pages = "117--125",
abstract = "In this paper, we describe a corpus UD Japanese-BCCWJ that was created by converting the Balanced Corpus of Contemporary Written Japanese (BCCWJ), a Japanese language corpus, to adhere to the UD annotation schema. The BCCWJ already assigns dependency information at the level of the bunsetsu (a Japanese syntactic unit comparable to the phrase). We developed a program to convert the BCCWJ to UD based on this dependency structure, and this corpus is the result of completely automatic conversion using the program. UD Japanese-BCCWJ is the largest-scale UD Japanese corpus and the second-largest of all UD corpora, including 1,980 documents, 57,109 sentences, and 1,273k words across six distinct domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="omura-asahara-2018-ud">
<titleInfo>
<title>UD-Japanese BCCWJ: Universal Dependencies Annotation for the Balanced Corpus of Contemporary Written Japanese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mai</namePart>
<namePart type="family">Omura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masayuki</namePart>
<namePart type="family">Asahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Universal Dependencies (UDW 2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Catherine</namePart>
<namePart type="family">de Marneffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Teresa</namePart>
<namePart type="family">Lynn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sebastian</namePart>
<namePart type="family">Schuster</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Brussels, Belgium</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe a corpus UD Japanese-BCCWJ that was created by converting the Balanced Corpus of Contemporary Written Japanese (BCCWJ), a Japanese language corpus, to adhere to the UD annotation schema. The BCCWJ already assigns dependency information at the level of the bunsetsu (a Japanese syntactic unit comparable to the phrase). We developed a program to convert the BCCWJ to UD based on this dependency structure, and this corpus is the result of completely automatic conversion using the program. UD Japanese-BCCWJ is the largest-scale UD Japanese corpus and the second-largest of all UD corpora, including 1,980 documents, 57,109 sentences, and 1,273k words across six distinct domains.</abstract>
<identifier type="citekey">omura-asahara-2018-ud</identifier>
<identifier type="doi">10.18653/v1/W18-6014</identifier>
<location>
<url>https://aclanthology.org/W18-6014</url>
</location>
<part>
<date>2018-11</date>
<extent unit="page">
<start>117</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T UD-Japanese BCCWJ: Universal Dependencies Annotation for the Balanced Corpus of Contemporary Written Japanese
%A Omura, Mai
%A Asahara, Masayuki
%Y de Marneffe, Marie-Catherine
%Y Lynn, Teresa
%Y Schuster, Sebastian
%S Proceedings of the Second Workshop on Universal Dependencies (UDW 2018)
%D 2018
%8 November
%I Association for Computational Linguistics
%C Brussels, Belgium
%F omura-asahara-2018-ud
%X In this paper, we describe a corpus UD Japanese-BCCWJ that was created by converting the Balanced Corpus of Contemporary Written Japanese (BCCWJ), a Japanese language corpus, to adhere to the UD annotation schema. The BCCWJ already assigns dependency information at the level of the bunsetsu (a Japanese syntactic unit comparable to the phrase). We developed a program to convert the BCCWJ to UD based on this dependency structure, and this corpus is the result of completely automatic conversion using the program. UD Japanese-BCCWJ is the largest-scale UD Japanese corpus and the second-largest of all UD corpora, including 1,980 documents, 57,109 sentences, and 1,273k words across six distinct domains.
%R 10.18653/v1/W18-6014
%U https://aclanthology.org/W18-6014
%U https://doi.org/10.18653/v1/W18-6014
%P 117-125
Markdown (Informal)
[UD-Japanese BCCWJ: Universal Dependencies Annotation for the Balanced Corpus of Contemporary Written Japanese](https://aclanthology.org/W18-6014) (Omura & Asahara, UDW 2018)
ACL