@inproceedings{hinrichs-zastrow-2012-automatic,
title = {Automatic Annotation and Manual Evaluation of the Diachronic {G}erman Corpus {T}{\"u}{B}a-{D}/{DC}},
author = "Hinrichs, Erhard and
Zastrow, Thomas",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/166_Paper.pdf",
pages = "1622--1627",
abstract = "This paper presents the Tu{\`I}bingen Baumbank des Deutschen Diachron (Tu{\`I}Ba-D/DC), a linguistically annotated corpus of selected diachronic materials from the German Gutenberg Project. It was automatically annotated by a suite of NLP tools integrated into WebLicht, the linguistic chaining tool used in CLARIN-D. The annotation quality has been evaluated manually for a subcorpus ranging from Middle High German to Modern High German. The integration of the Tu{\`I}Ba-D/DC into the CLARIN-D infrastructure includes metadata provision and harvesting as well as sustainable data storage in the Tu{\`I}bingen CLARIN-D center. The paper further provides an overview of the possibilities of accessing the Tu{\`I}Ba-D/DC data. Methods for full-text search of the metadata and object data and for annotation-based search of the object data are described in detail. The WebLicht Service Oriented Architecture is used as an integrated environment for annotation based search of the Tu{\`I}Ba-D/DC. WebLicht thus not only serves as the annotation platform for the Tu{\`I}Ba-D/DC, but also as a generic user interface for accessing and visualizing it.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hinrichs-zastrow-2012-automatic">
<titleInfo>
<title>Automatic Annotation and Manual Evaluation of the Diachronic German Corpus TüBa-D/DC</title>
</titleInfo>
<name type="personal">
<namePart type="given">Erhard</namePart>
<namePart type="family">Hinrichs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">Zastrow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the TuÌbingen Baumbank des Deutschen Diachron (TuÌBa-D/DC), a linguistically annotated corpus of selected diachronic materials from the German Gutenberg Project. It was automatically annotated by a suite of NLP tools integrated into WebLicht, the linguistic chaining tool used in CLARIN-D. The annotation quality has been evaluated manually for a subcorpus ranging from Middle High German to Modern High German. The integration of the TuÌBa-D/DC into the CLARIN-D infrastructure includes metadata provision and harvesting as well as sustainable data storage in the TuÌbingen CLARIN-D center. The paper further provides an overview of the possibilities of accessing the TuÌBa-D/DC data. Methods for full-text search of the metadata and object data and for annotation-based search of the object data are described in detail. The WebLicht Service Oriented Architecture is used as an integrated environment for annotation based search of the TuÌBa-D/DC. WebLicht thus not only serves as the annotation platform for the TuÌBa-D/DC, but also as a generic user interface for accessing and visualizing it.</abstract>
<identifier type="citekey">hinrichs-zastrow-2012-automatic</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/166_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>1622</start>
<end>1627</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Annotation and Manual Evaluation of the Diachronic German Corpus TüBa-D/DC
%A Hinrichs, Erhard
%A Zastrow, Thomas
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F hinrichs-zastrow-2012-automatic
%X This paper presents the TuÌbingen Baumbank des Deutschen Diachron (TuÌBa-D/DC), a linguistically annotated corpus of selected diachronic materials from the German Gutenberg Project. It was automatically annotated by a suite of NLP tools integrated into WebLicht, the linguistic chaining tool used in CLARIN-D. The annotation quality has been evaluated manually for a subcorpus ranging from Middle High German to Modern High German. The integration of the TuÌBa-D/DC into the CLARIN-D infrastructure includes metadata provision and harvesting as well as sustainable data storage in the TuÌbingen CLARIN-D center. The paper further provides an overview of the possibilities of accessing the TuÌBa-D/DC data. Methods for full-text search of the metadata and object data and for annotation-based search of the object data are described in detail. The WebLicht Service Oriented Architecture is used as an integrated environment for annotation based search of the TuÌBa-D/DC. WebLicht thus not only serves as the annotation platform for the TuÌBa-D/DC, but also as a generic user interface for accessing and visualizing it.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/166_Paper.pdf
%P 1622-1627
Markdown (Informal)
[Automatic Annotation and Manual Evaluation of the Diachronic German Corpus TüBa-D/DC](http://www.lrec-conf.org/proceedings/lrec2012/pdf/166_Paper.pdf) (Hinrichs & Zastrow, LREC 2012)
ACL