@inproceedings{rodven-eide-2020-anforanden,
title = {{A}nf{\"o}randen: Annotated and Augmented Parliamentary Debates from {S}weden},
author = "R{\o}dven Eide, Stian",
editor = "Fi{\v{s}}er, Darja and
Eskevich, Maria and
de Jong, Franciska",
booktitle = "Proceedings of the Second ParlaCLARIN Workshop",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.parlaclarin-1.2",
pages = "5--10",
abstract = {The Swedish parliamentary debates have been available since 2010 through the parliament{'}s open data web site Riksdagens {\"o}ppna data. While fairly comprehensive, the structure of the data can be hard to understand and its content is somewhat noisy for use as a quality language resource. In order to make them easier to use and process {--} in particular for language technology research, but also for political science and other fields with an interest in parliamentary data {--} we have published a large selection of the debates in a cleaned and structured format, annotated with linguistic information and augmented with semantic links. Especially prevalent in the parliament{'}s data were end-line hyphenations {--} something that tokenisers generally are not equipped for {--} and a lot of the effort went into resolving these. In this paper, we provide detailed descriptions of the structure and contents of the resource, and explain how it differs from the parliament{'}s own version.},
language = "English",
ISBN = "979-10-95546-47-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rodven-eide-2020-anforanden">
<titleInfo>
<title>Anföranden: Annotated and Augmented Parliamentary Debates from Sweden</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stian</namePart>
<namePart type="family">Rødven Eide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second ParlaCLARIN Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Darja</namePart>
<namePart type="family">Fišer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Eskevich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franciska</namePart>
<namePart type="family">de Jong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-47-4</identifier>
</relatedItem>
<abstract>The Swedish parliamentary debates have been available since 2010 through the parliament’s open data web site Riksdagens öppna data. While fairly comprehensive, the structure of the data can be hard to understand and its content is somewhat noisy for use as a quality language resource. In order to make them easier to use and process – in particular for language technology research, but also for political science and other fields with an interest in parliamentary data – we have published a large selection of the debates in a cleaned and structured format, annotated with linguistic information and augmented with semantic links. Especially prevalent in the parliament’s data were end-line hyphenations – something that tokenisers generally are not equipped for – and a lot of the effort went into resolving these. In this paper, we provide detailed descriptions of the structure and contents of the resource, and explain how it differs from the parliament’s own version.</abstract>
<identifier type="citekey">rodven-eide-2020-anforanden</identifier>
<location>
<url>https://aclanthology.org/2020.parlaclarin-1.2</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>5</start>
<end>10</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Anföranden: Annotated and Augmented Parliamentary Debates from Sweden
%A Rødven Eide, Stian
%Y Fišer, Darja
%Y Eskevich, Maria
%Y de Jong, Franciska
%S Proceedings of the Second ParlaCLARIN Workshop
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-47-4
%G English
%F rodven-eide-2020-anforanden
%X The Swedish parliamentary debates have been available since 2010 through the parliament’s open data web site Riksdagens öppna data. While fairly comprehensive, the structure of the data can be hard to understand and its content is somewhat noisy for use as a quality language resource. In order to make them easier to use and process – in particular for language technology research, but also for political science and other fields with an interest in parliamentary data – we have published a large selection of the debates in a cleaned and structured format, annotated with linguistic information and augmented with semantic links. Especially prevalent in the parliament’s data were end-line hyphenations – something that tokenisers generally are not equipped for – and a lot of the effort went into resolving these. In this paper, we provide detailed descriptions of the structure and contents of the resource, and explain how it differs from the parliament’s own version.
%U https://aclanthology.org/2020.parlaclarin-1.2
%P 5-10
Markdown (Informal)
[Anföranden: Annotated and Augmented Parliamentary Debates from Sweden](https://aclanthology.org/2020.parlaclarin-1.2) (Rødven Eide, ParlaCLARIN 2020)
ACL