@inproceedings{krek-etal-2020-gigafida,
title = "Gigafida 2.0: The Reference Corpus of Written Standard {S}lovene",
author = "Krek, Simon and
Arhar Holdt, {\v{S}}pela and
Erjavec, Toma{\v{z}} and
{\v{C}}ibej, Jaka and
Repar, Andraz and
Gantar, Polona and
Ljube{\v{s}}i{\'c}, Nikola and
Kosem, Iztok and
Dobrovoljc, Kaja",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.409",
pages = "3340--3345",
abstract = "We describe a new version of the Gigafida reference corpus of Slovene. In addition to updating the corpus with new material and annotating it with better tools, the focus of the upgrade was also on its transformation from a general reference corpus, which contains all language variants including non-standard language, to the corpus of standard (written) Slovene. This decision could be implemented as new corpora dedicated specifically to non-standard language emerged recently. In the new version, the whole Gigafida corpus was deduplicated for the first time, which facilitates automatic extraction of data for the purposes of compilation of new lexicographic resources such as the collocations dictionary and the thesaurus of Slovene.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="krek-etal-2020-gigafida">
<titleInfo>
<title>Gigafida 2.0: The Reference Corpus of Written Standard Slovene</title>
</titleInfo>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Krek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Špela</namePart>
<namePart type="family">Arhar Holdt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaka</namePart>
<namePart type="family">Čibej</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andraz</namePart>
<namePart type="family">Repar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Polona</namePart>
<namePart type="family">Gantar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iztok</namePart>
<namePart type="family">Kosem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kaja</namePart>
<namePart type="family">Dobrovoljc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>We describe a new version of the Gigafida reference corpus of Slovene. In addition to updating the corpus with new material and annotating it with better tools, the focus of the upgrade was also on its transformation from a general reference corpus, which contains all language variants including non-standard language, to the corpus of standard (written) Slovene. This decision could be implemented as new corpora dedicated specifically to non-standard language emerged recently. In the new version, the whole Gigafida corpus was deduplicated for the first time, which facilitates automatic extraction of data for the purposes of compilation of new lexicographic resources such as the collocations dictionary and the thesaurus of Slovene.</abstract>
<identifier type="citekey">krek-etal-2020-gigafida</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.409</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>3340</start>
<end>3345</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gigafida 2.0: The Reference Corpus of Written Standard Slovene
%A Krek, Simon
%A Arhar Holdt, Špela
%A Erjavec, Tomaž
%A Čibej, Jaka
%A Repar, Andraz
%A Gantar, Polona
%A Ljubešić, Nikola
%A Kosem, Iztok
%A Dobrovoljc, Kaja
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F krek-etal-2020-gigafida
%X We describe a new version of the Gigafida reference corpus of Slovene. In addition to updating the corpus with new material and annotating it with better tools, the focus of the upgrade was also on its transformation from a general reference corpus, which contains all language variants including non-standard language, to the corpus of standard (written) Slovene. This decision could be implemented as new corpora dedicated specifically to non-standard language emerged recently. In the new version, the whole Gigafida corpus was deduplicated for the first time, which facilitates automatic extraction of data for the purposes of compilation of new lexicographic resources such as the collocations dictionary and the thesaurus of Slovene.
%U https://aclanthology.org/2020.lrec-1.409
%P 3340-3345
Markdown (Informal)
[Gigafida 2.0: The Reference Corpus of Written Standard Slovene](https://aclanthology.org/2020.lrec-1.409) (Krek et al., LREC 2020)
ACL
- Simon Krek, Špela Arhar Holdt, Tomaž Erjavec, Jaka Čibej, Andraz Repar, Polona Gantar, Nikola Ljubešić, Iztok Kosem, and Kaja Dobrovoljc. 2020. Gigafida 2.0: The Reference Corpus of Written Standard Slovene. In Proceedings of the Twelfth Language Resources and Evaluation Conference, pages 3340–3345, Marseille, France. European Language Resources Association.