@inproceedings{koiso-etal-2014-design,
    title = "Design and development of an {RDB} version of the Corpus of Spontaneous {J}apanese",
    author = "Koiso, Hanae  and
      Den, Yasuharu  and
      Nishikawa, Ken{'}ya  and
      Maekawa, Kikuo",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Declerck, Thierry  and
      Loftsson, Hrafn  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
    month = may,
    year = "2014",
    address = "Reykjavik, Iceland",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L14-1371/",
    pages = "1471--1476",
    abstract = "In this paper, we describe the design and development of a new version of the Corpus of Spontaneous Japanese (CSJ), which is a large-scale spoken corpus released in 2004. CSJ contains various annotations that are represented in XML format (CSJ-XML). CSJ-XML, however, is very complicated and suffers from some problems. To overcome this problem, we have developed and released, in 2013, a relational database version of CSJ (CSJ-RDB). CSJ-RDB is based on an extension of the segment and link-based annotation scheme, which we adapted to handle multi-channel and multi-modal streams. Because this scheme adopts a stand-off framework, CSJ-RDB can represent three hierarchical structures at the same time: inter-pausal-unit-top, clause-top, and intonational-phrase-top. CSJ-RDB consists of five different types of tables: segment, unaligned-segment, link, relation, and meta-information tables. The database was automatically constructed from annotation files extracted from CSJ-XML by using general-purpose corpus construction tools. CSJ-RDB enables us to easily and efficiently conduct complex searches required for corpus-based studies of spoken language."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="koiso-etal-2014-design">
    <titleInfo>
        <title>Design and development of an RDB version of the Corpus of Spontaneous Japanese</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Hanae</namePart>
        <namePart type="family">Koiso</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yasuharu</namePart>
        <namePart type="family">Den</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Ken’ya</namePart>
        <namePart type="family">Nishikawa</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Kikuo</namePart>
        <namePart type="family">Maekawa</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2014-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Nicoletta</namePart>
            <namePart type="family">Calzolari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Khalid</namePart>
            <namePart type="family">Choukri</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Thierry</namePart>
            <namePart type="family">Declerck</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Hrafn</namePart>
            <namePart type="family">Loftsson</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Bente</namePart>
            <namePart type="family">Maegaard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joseph</namePart>
            <namePart type="family">Mariani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Asuncion</namePart>
            <namePart type="family">Moreno</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jan</namePart>
            <namePart type="family">Odijk</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Stelios</namePart>
            <namePart type="family">Piperidis</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Reykjavik, Iceland</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>In this paper, we describe the design and development of a new version of the Corpus of Spontaneous Japanese (CSJ), which is a large-scale spoken corpus released in 2004. CSJ contains various annotations that are represented in XML format (CSJ-XML). CSJ-XML, however, is very complicated and suffers from some problems. To overcome this problem, we have developed and released, in 2013, a relational database version of CSJ (CSJ-RDB). CSJ-RDB is based on an extension of the segment and link-based annotation scheme, which we adapted to handle multi-channel and multi-modal streams. Because this scheme adopts a stand-off framework, CSJ-RDB can represent three hierarchical structures at the same time: inter-pausal-unit-top, clause-top, and intonational-phrase-top. CSJ-RDB consists of five different types of tables: segment, unaligned-segment, link, relation, and meta-information tables. The database was automatically constructed from annotation files extracted from CSJ-XML by using general-purpose corpus construction tools. CSJ-RDB enables us to easily and efficiently conduct complex searches required for corpus-based studies of spoken language.</abstract>
    <identifier type="citekey">koiso-etal-2014-design</identifier>
    <location>
        <url>https://aclanthology.org/L14-1371/</url>
    </location>
    <part>
        <date>2014-05</date>
        <extent unit="page">
            <start>1471</start>
            <end>1476</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Design and development of an RDB version of the Corpus of Spontaneous Japanese
%A Koiso, Hanae
%A Den, Yasuharu
%A Nishikawa, Ken’ya
%A Maekawa, Kikuo
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F koiso-etal-2014-design
%X In this paper, we describe the design and development of a new version of the Corpus of Spontaneous Japanese (CSJ), which is a large-scale spoken corpus released in 2004. CSJ contains various annotations that are represented in XML format (CSJ-XML). CSJ-XML, however, is very complicated and suffers from some problems. To overcome this problem, we have developed and released, in 2013, a relational database version of CSJ (CSJ-RDB). CSJ-RDB is based on an extension of the segment and link-based annotation scheme, which we adapted to handle multi-channel and multi-modal streams. Because this scheme adopts a stand-off framework, CSJ-RDB can represent three hierarchical structures at the same time: inter-pausal-unit-top, clause-top, and intonational-phrase-top. CSJ-RDB consists of five different types of tables: segment, unaligned-segment, link, relation, and meta-information tables. The database was automatically constructed from annotation files extracted from CSJ-XML by using general-purpose corpus construction tools. CSJ-RDB enables us to easily and efficiently conduct complex searches required for corpus-based studies of spoken language.
%U https://aclanthology.org/L14-1371/
%P 1471-1476
Markdown (Informal)
[Design and development of an RDB version of the Corpus of Spontaneous Japanese](https://aclanthology.org/L14-1371/) (Koiso et al., LREC 2014)
ACL