@inproceedings{oostdijk-etal-2008-coi,
    title = "From {D}-Coi to {S}o{N}a{R}: a reference corpus for {D}utch",
    author = "Oostdijk, Nelleke  and
      Reynaert, Martin  and
      Monachesi, Paola  and
      Van Noord, Gertjan  and
      Ordelman, Roeland  and
      Schuurman, Ineke  and
      Vandeghinste, Vincent",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Odijk, Jan  and
      Piperidis, Stelios  and
      Tapias, Daniel",
    booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
    month = may,
    year = "2008",
    address = "Marrakech, Morocco",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L08-1226/",
    abstract = "The computational linguistics community in The Netherlands and Belgium has long recognized the dire need for a major reference corpus of written Dutch. In part to answer this need, the STEVIN programme was established. To pave the way for the effective building of a 500-million-word reference corpus of written Dutch, a pilot project was established. The Dutch Corpus Initiative project or D-Coi was highly successful in that it not only realized about 10{\%} of the projected large reference corpus, but also established the best practices and developed all the protocols and the necessary tools for building the larger corpus within the confines of a necessarily limited budget. We outline the steps involved in an endeavour of this kind, including the major highlights and possible pitfalls. Once converted to a suitable XML format, further linguistic annotation based on the state-of-the-art tools developed either before or during the pilot by the consortium partners proved easily and fruitfully applicable. Linguistic enrichment of the corpus includes PoS tagging, syntactic parsing and semantic annotation, involving both semantic role labeling and spatiotemporal annotation. D-Coi is expected to be followed by SoNaR, during which the 500-million-word reference corpus of Dutch should be built."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="oostdijk-etal-2008-coi">
    <titleInfo>
        <title>From D-Coi to SoNaR: a reference corpus for Dutch</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Nelleke</namePart>
        <namePart type="family">Oostdijk</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Martin</namePart>
        <namePart type="family">Reynaert</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Paola</namePart>
        <namePart type="family">Monachesi</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Gertjan</namePart>
        <namePart type="family">Van Noord</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Roeland</namePart>
        <namePart type="family">Ordelman</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Ineke</namePart>
        <namePart type="family">Schuurman</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Vincent</namePart>
        <namePart type="family">Vandeghinste</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2008-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Nicoletta</namePart>
            <namePart type="family">Calzolari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Khalid</namePart>
            <namePart type="family">Choukri</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Bente</namePart>
            <namePart type="family">Maegaard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joseph</namePart>
            <namePart type="family">Mariani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jan</namePart>
            <namePart type="family">Odijk</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Stelios</namePart>
            <namePart type="family">Piperidis</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Daniel</namePart>
            <namePart type="family">Tapias</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Marrakech, Morocco</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>The computational linguistics community in The Netherlands and Belgium has long recognized the dire need for a major reference corpus of written Dutch. In part to answer this need, the STEVIN programme was established. To pave the way for the effective building of a 500-million-word reference corpus of written Dutch, a pilot project was established. The Dutch Corpus Initiative project or D-Coi was highly successful in that it not only realized about 10% of the projected large reference corpus, but also established the best practices and developed all the protocols and the necessary tools for building the larger corpus within the confines of a necessarily limited budget. We outline the steps involved in an endeavour of this kind, including the major highlights and possible pitfalls. Once converted to a suitable XML format, further linguistic annotation based on the state-of-the-art tools developed either before or during the pilot by the consortium partners proved easily and fruitfully applicable. Linguistic enrichment of the corpus includes PoS tagging, syntactic parsing and semantic annotation, involving both semantic role labeling and spatiotemporal annotation. D-Coi is expected to be followed by SoNaR, during which the 500-million-word reference corpus of Dutch should be built.</abstract>
    <identifier type="citekey">oostdijk-etal-2008-coi</identifier>
    <location>
        <url>https://aclanthology.org/L08-1226/</url>
    </location>
    <part>
        <date>2008-05</date>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T From D-Coi to SoNaR: a reference corpus for Dutch
%A Oostdijk, Nelleke
%A Reynaert, Martin
%A Monachesi, Paola
%A Van Noord, Gertjan
%A Ordelman, Roeland
%A Schuurman, Ineke
%A Vandeghinste, Vincent
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F oostdijk-etal-2008-coi
%X The computational linguistics community in The Netherlands and Belgium has long recognized the dire need for a major reference corpus of written Dutch. In part to answer this need, the STEVIN programme was established. To pave the way for the effective building of a 500-million-word reference corpus of written Dutch, a pilot project was established. The Dutch Corpus Initiative project or D-Coi was highly successful in that it not only realized about 10% of the projected large reference corpus, but also established the best practices and developed all the protocols and the necessary tools for building the larger corpus within the confines of a necessarily limited budget. We outline the steps involved in an endeavour of this kind, including the major highlights and possible pitfalls. Once converted to a suitable XML format, further linguistic annotation based on the state-of-the-art tools developed either before or during the pilot by the consortium partners proved easily and fruitfully applicable. Linguistic enrichment of the corpus includes PoS tagging, syntactic parsing and semantic annotation, involving both semantic role labeling and spatiotemporal annotation. D-Coi is expected to be followed by SoNaR, during which the 500-million-word reference corpus of Dutch should be built.
%U https://aclanthology.org/L08-1226/
Markdown (Informal)
[From D-Coi to SoNaR: a reference corpus for Dutch](https://aclanthology.org/L08-1226/) (Oostdijk et al., LREC 2008)
ACL
- Nelleke Oostdijk, Martin Reynaert, Paola Monachesi, Gertjan Van Noord, Roeland Ordelman, Ineke Schuurman, and Vincent Vandeghinste. 2008. From D-Coi to SoNaR: a reference corpus for Dutch. In Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC'08), Marrakech, Morocco. European Language Resources Association (ELRA).