@inproceedings{laarmann-quante-etal-2019-making,
title = "The making of the Litkey Corpus, a richly annotated longitudinal corpus of {G}erman texts written by primary school children",
author = "Laarmann-Quante, Ronja and
Dipper, Stefanie and
Belke, Eva",
editor = "Friedrich, Annemarie and
Zeyrek, Deniz and
Hoek, Jet",
booktitle = "Proceedings of the 13th Linguistic Annotation Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4006",
doi = "10.18653/v1/W19-4006",
pages = "43--55",
abstract = "To date, corpus and computational linguistic work on written language acquisition has mostly dealt with second language learners who have usually already mastered orthography acquisition in their first language. In this paper, we present the Litkey Corpus, a richly-annotated longitudinal corpus of written texts produced by primary school children in Germany from grades 2 to 4. The paper focuses on the (semi-)automatic annotation procedure at various linguistic levels, which include POS tags, features of the word-internal structure (phonemes, syllables, morphemes) and key orthographic features of the target words as well as a categorization of spelling errors. Comprehensive evaluations show that high accuracy was achieved on all levels, making the Litkey Corpus a useful resource for corpus-based research on literacy acquisition of German primary school children and for developing NLP tools for educational purposes. The corpus is freely available under \url{https://www.linguistics.rub.de/litkeycorpus/}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laarmann-quante-etal-2019-making">
<titleInfo>
<title>The making of the Litkey Corpus, a richly annotated longitudinal corpus of German texts written by primary school children</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ronja</namePart>
<namePart type="family">Laarmann-Quante</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Dipper</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eva</namePart>
<namePart type="family">Belke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th Linguistic Annotation Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Annemarie</namePart>
<namePart type="family">Friedrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deniz</namePart>
<namePart type="family">Zeyrek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jet</namePart>
<namePart type="family">Hoek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To date, corpus and computational linguistic work on written language acquisition has mostly dealt with second language learners who have usually already mastered orthography acquisition in their first language. In this paper, we present the Litkey Corpus, a richly-annotated longitudinal corpus of written texts produced by primary school children in Germany from grades 2 to 4. The paper focuses on the (semi-)automatic annotation procedure at various linguistic levels, which include POS tags, features of the word-internal structure (phonemes, syllables, morphemes) and key orthographic features of the target words as well as a categorization of spelling errors. Comprehensive evaluations show that high accuracy was achieved on all levels, making the Litkey Corpus a useful resource for corpus-based research on literacy acquisition of German primary school children and for developing NLP tools for educational purposes. The corpus is freely available under https://www.linguistics.rub.de/litkeycorpus/.</abstract>
<identifier type="citekey">laarmann-quante-etal-2019-making</identifier>
<identifier type="doi">10.18653/v1/W19-4006</identifier>
<location>
<url>https://aclanthology.org/W19-4006</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>43</start>
<end>55</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The making of the Litkey Corpus, a richly annotated longitudinal corpus of German texts written by primary school children
%A Laarmann-Quante, Ronja
%A Dipper, Stefanie
%A Belke, Eva
%Y Friedrich, Annemarie
%Y Zeyrek, Deniz
%Y Hoek, Jet
%S Proceedings of the 13th Linguistic Annotation Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F laarmann-quante-etal-2019-making
%X To date, corpus and computational linguistic work on written language acquisition has mostly dealt with second language learners who have usually already mastered orthography acquisition in their first language. In this paper, we present the Litkey Corpus, a richly-annotated longitudinal corpus of written texts produced by primary school children in Germany from grades 2 to 4. The paper focuses on the (semi-)automatic annotation procedure at various linguistic levels, which include POS tags, features of the word-internal structure (phonemes, syllables, morphemes) and key orthographic features of the target words as well as a categorization of spelling errors. Comprehensive evaluations show that high accuracy was achieved on all levels, making the Litkey Corpus a useful resource for corpus-based research on literacy acquisition of German primary school children and for developing NLP tools for educational purposes. The corpus is freely available under https://www.linguistics.rub.de/litkeycorpus/.
%R 10.18653/v1/W19-4006
%U https://aclanthology.org/W19-4006
%U https://doi.org/10.18653/v1/W19-4006
%P 43-55
Markdown (Informal)
[The making of the Litkey Corpus, a richly annotated longitudinal corpus of German texts written by primary school children](https://aclanthology.org/W19-4006) (Laarmann-Quante et al., LAW 2019)
ACL