@inproceedings{erjavec-krek-2008-jos,
title = "The {JOS} Morphosyntactically Tagged Corpus of {S}lovene",
author = "Erjavec, Toma{\v{z}} and
Krek, Simon",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}`08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L08-1451/",
abstract = "The JOSmorphosyntactic resources for Slovene consist of the specifications, lexicon, and two corpora: jos100k, a 100,000 word balanced monolingual sampled corpus annotated with hand validated morphosyntactic descriptions (MSDs) and lemmas, and jos1M, the 1 million-word partially hand validated corpus. The two corpora have been sampled from the 600M-word Slovene reference corpus FidaPLUS. The JOS resources have a standardised encoding, with the MULTEXT-East-type morphosyntactic specifications and the corpora encoded according to the Text Encoding Initiative Guidelines P5. JOS resources are available as a dataset for research under the Creative Commons licence and are meant to facilitate developments of HLT for Slovene."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="erjavec-krek-2008-jos">
<titleInfo>
<title>The JOS Morphosyntactically Tagged Corpus of Slovene</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tomaž</namePart>
<namePart type="family">Erjavec</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Krek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC‘08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The JOSmorphosyntactic resources for Slovene consist of the specifications, lexicon, and two corpora: jos100k, a 100,000 word balanced monolingual sampled corpus annotated with hand validated morphosyntactic descriptions (MSDs) and lemmas, and jos1M, the 1 million-word partially hand validated corpus. The two corpora have been sampled from the 600M-word Slovene reference corpus FidaPLUS. The JOS resources have a standardised encoding, with the MULTEXT-East-type morphosyntactic specifications and the corpora encoded according to the Text Encoding Initiative Guidelines P5. JOS resources are available as a dataset for research under the Creative Commons licence and are meant to facilitate developments of HLT for Slovene.</abstract>
<identifier type="citekey">erjavec-krek-2008-jos</identifier>
<location>
<url>https://aclanthology.org/L08-1451/</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The JOS Morphosyntactically Tagged Corpus of Slovene
%A Erjavec, Tomaž
%A Krek, Simon
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC‘08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F erjavec-krek-2008-jos
%X The JOSmorphosyntactic resources for Slovene consist of the specifications, lexicon, and two corpora: jos100k, a 100,000 word balanced monolingual sampled corpus annotated with hand validated morphosyntactic descriptions (MSDs) and lemmas, and jos1M, the 1 million-word partially hand validated corpus. The two corpora have been sampled from the 600M-word Slovene reference corpus FidaPLUS. The JOS resources have a standardised encoding, with the MULTEXT-East-type morphosyntactic specifications and the corpora encoded according to the Text Encoding Initiative Guidelines P5. JOS resources are available as a dataset for research under the Creative Commons licence and are meant to facilitate developments of HLT for Slovene.
%U https://aclanthology.org/L08-1451/
Markdown (Informal)
[The JOS Morphosyntactically Tagged Corpus of Slovene](https://aclanthology.org/L08-1451/) (Erjavec & Krek, LREC 2008)
ACL