@inproceedings{pancur-erjavec-2020-siparl,
title = "The si{P}arl corpus of {S}lovene parliamentary proceedings",
author = "Pancur, Andrej and
Erjavec, Toma{\v{z}}",
booktitle = "Proceedings of the Second ParlaCLARIN Workshop",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.parlaclarin-1.6",
pages = "28--34",
abstract = "The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990{--}2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history.",
language = "English",
ISBN = "979-10-95546-47-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pancur-erjavec-2020-siparl">
<titleInfo>
<title>The siParl corpus of Slovene parliamentary proceedings</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andrej</namePart>
<namePart type="family">Pancur</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second ParlaCLARIN Workshop</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-47-4</identifier>
</relatedItem>
<abstract>The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990–2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history.</abstract>
<identifier type="citekey">pancur-erjavec-2020-siparl</identifier>
<location>
<url>https://aclanthology.org/2020.parlaclarin-1.6</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>28</start>
<end>34</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The siParl corpus of Slovene parliamentary proceedings
%A Pancur, Andrej
%A Erjavec, Tomaž
%S Proceedings of the Second ParlaCLARIN Workshop
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-47-4
%G English
%F pancur-erjavec-2020-siparl
%X The paper describes the process of acquisition, up-translation, encoding, annotation, and distribution of siParl, a collection of the parliamentary debates from the Assembly of the Republic of Slovenia from 1990–2018, covering the period from just before Slovenia became an independent country in 1991, and almost up to the present. The entire corpus, comprising over 8 thousand sessions, 1 million speeches and 200 million words was uniformly encoded in accordance with the TEI-based Parla-CLARIN schema for encoding corpora of parliamentary debates, and contains extensive meta-data about the speakers, a typology of sessions etc. and structural and editorial annotations. The corpus was also part-of-speech tagged and lemmatised using state-of-the-art tools. The corpus is maintained on GitHub with its major versions archived in the CLARIN.SI repository and is available for linguistic analysis in the scope of the on-line CLARIN.SI concordancers, thus offering an invaluable resource for scholars studying Slovenian political history.
%U https://aclanthology.org/2020.parlaclarin-1.6
%P 28-34
Markdown (Informal)
[The siParl corpus of Slovene parliamentary proceedings](https://aclanthology.org/2020.parlaclarin-1.6) (Pancur & Erjavec, ParlaCLARIN 2020)
ACL