@inproceedings{schneider-2014-genitivdb,
title = "{G}enitiv{DB} {---} a Corpus-Generated Database for {G}erman Genitive Classification",
author = "Schneider, Roman",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/346_Paper.pdf",
pages = "988--994",
abstract = "We present a novel NLP resource for the explanation of linguistic phenomena, built and evaluated exploring very large annotated language corpora. For the compilation, we use the German Reference Corpus (DeReKo) with more than 5 billion word forms, which is the largest linguistic resource worldwide for the study of contemporary written German. The result is a comprehensive database of German genitive formations, enriched with a broad range of intra- und extralinguistic metadata. It can be used for the notoriously controversial classification and prediction of genitive endings (short endings, long endings, zero-marker). We also evaluate the main factors influencing the use of specific endings. To get a general idea about a factors influences and its side effects, we calculate chi-square-tests and visualize the residuals with an association plot. The results are evaluated against a gold standard by implementing tree-based machine learning algorithms. For the statistical analysis, we applied the supervised LMT Logistic Model Trees algorithm, using the WEKA software. We intend to use this gold standard to evaluate GenitivDB, as well as to explore methodologies for a predictive genitive model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schneider-2014-genitivdb">
<titleInfo>
<title>GenitivDB — a Corpus-Generated Database for German Genitive Classification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a novel NLP resource for the explanation of linguistic phenomena, built and evaluated exploring very large annotated language corpora. For the compilation, we use the German Reference Corpus (DeReKo) with more than 5 billion word forms, which is the largest linguistic resource worldwide for the study of contemporary written German. The result is a comprehensive database of German genitive formations, enriched with a broad range of intra- und extralinguistic metadata. It can be used for the notoriously controversial classification and prediction of genitive endings (short endings, long endings, zero-marker). We also evaluate the main factors influencing the use of specific endings. To get a general idea about a factors influences and its side effects, we calculate chi-square-tests and visualize the residuals with an association plot. The results are evaluated against a gold standard by implementing tree-based machine learning algorithms. For the statistical analysis, we applied the supervised LMT Logistic Model Trees algorithm, using the WEKA software. We intend to use this gold standard to evaluate GenitivDB, as well as to explore methodologies for a predictive genitive model.</abstract>
<identifier type="citekey">schneider-2014-genitivdb</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/346_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>988</start>
<end>994</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GenitivDB — a Corpus-Generated Database for German Genitive Classification
%A Schneider, Roman
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F schneider-2014-genitivdb
%X We present a novel NLP resource for the explanation of linguistic phenomena, built and evaluated exploring very large annotated language corpora. For the compilation, we use the German Reference Corpus (DeReKo) with more than 5 billion word forms, which is the largest linguistic resource worldwide for the study of contemporary written German. The result is a comprehensive database of German genitive formations, enriched with a broad range of intra- und extralinguistic metadata. It can be used for the notoriously controversial classification and prediction of genitive endings (short endings, long endings, zero-marker). We also evaluate the main factors influencing the use of specific endings. To get a general idea about a factors influences and its side effects, we calculate chi-square-tests and visualize the residuals with an association plot. The results are evaluated against a gold standard by implementing tree-based machine learning algorithms. For the statistical analysis, we applied the supervised LMT Logistic Model Trees algorithm, using the WEKA software. We intend to use this gold standard to evaluate GenitivDB, as well as to explore methodologies for a predictive genitive model.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/346_Paper.pdf
%P 988-994
Markdown (Informal)
[GenitivDB — a Corpus-Generated Database for German Genitive Classification](http://www.lrec-conf.org/proceedings/lrec2014/pdf/346_Paper.pdf) (Schneider, LREC 2014)
ACL