@inproceedings{blaette-etal-2022-germaparl,
title = "How {G}erma{P}arl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement",
author = "Blaette, Andreas and
Rakers, Julia and
Leonhardt, Christoph",
editor = "Fi{\v{s}}er, Darja and
Eskevich, Maria and
Lenardi{\v{c}}, Jakob and
de Jong, Franciska",
booktitle = "Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.parlaclarin-1.2",
pages = "7--15",
abstract = "The development and curation of large-scale corpora of plenary debates requires not only care and attention to detail when the data is created but also effective means of sustainable quality control. This paper makes two contributions: Firstly, it presents an updated version of the GermaParl corpus of parliamentary debates in the German *Bundestag*. Secondly, it shows how the corpus preparation pipeline is designed to serve the quality of the resource by facilitating effective community involvement. Centered around a workflow which combines reproducibility, transparency and version control, the pipeline allows for continuous improvements to the corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="blaette-etal-2022-germaparl">
<titleInfo>
<title>How GermaParl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Blaette</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Rakers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Leonhardt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Darja</namePart>
<namePart type="family">Fišer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Eskevich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakob</namePart>
<namePart type="family">Lenardič</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franciska</namePart>
<namePart type="family">de Jong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The development and curation of large-scale corpora of plenary debates requires not only care and attention to detail when the data is created but also effective means of sustainable quality control. This paper makes two contributions: Firstly, it presents an updated version of the GermaParl corpus of parliamentary debates in the German *Bundestag*. Secondly, it shows how the corpus preparation pipeline is designed to serve the quality of the resource by facilitating effective community involvement. Centered around a workflow which combines reproducibility, transparency and version control, the pipeline allows for continuous improvements to the corpus.</abstract>
<identifier type="citekey">blaette-etal-2022-germaparl</identifier>
<location>
<url>https://aclanthology.org/2022.parlaclarin-1.2</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>7</start>
<end>15</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How GermaParl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement
%A Blaette, Andreas
%A Rakers, Julia
%A Leonhardt, Christoph
%Y Fišer, Darja
%Y Eskevich, Maria
%Y Lenardič, Jakob
%Y de Jong, Franciska
%S Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F blaette-etal-2022-germaparl
%X The development and curation of large-scale corpora of plenary debates requires not only care and attention to detail when the data is created but also effective means of sustainable quality control. This paper makes two contributions: Firstly, it presents an updated version of the GermaParl corpus of parliamentary debates in the German *Bundestag*. Secondly, it shows how the corpus preparation pipeline is designed to serve the quality of the resource by facilitating effective community involvement. Centered around a workflow which combines reproducibility, transparency and version control, the pipeline allows for continuous improvements to the corpus.
%U https://aclanthology.org/2022.parlaclarin-1.2
%P 7-15
Markdown (Informal)
[How GermaParl Evolves: Improving Data Quality by Reproducible Corpus Preparation and User Involvement](https://aclanthology.org/2022.parlaclarin-1.2) (Blaette et al., ParlaCLARIN 2022)
ACL