@inproceedings{chaplynskyi-romanyshyn-2024-introducing,
title = "Introducing {NER}-{UK} 2.0: A Rich Corpus of Named Entities for {U}krainian",
author = "Chaplynskyi, Dmytro and
Romanyshyn, Mariana",
editor = "Romanyshyn, Mariana and
Romanyshyn, Nataliia and
Hlybovets, Andrii and
Ignatenko, Oleksii",
booktitle = "Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.unlp-1.4",
pages = "23--29",
abstract = "This paper presents NER-UK 2.0, a corpus of texts in the Ukrainian language manually annotated for the named entity recognition task. The corpus contains 560 texts of multiple genres, boasting 21,993 entities in total. The annotation scheme covers 13 entity types, namely location, person name, organization, artifact, document, job title, date, time, period, money, percentage, quantity, and miscellaneous. Such a rich set of entities makes the corpus valuable for training named-entity recognition models in various domains, including news, social media posts, legal documents, and procurement contracts. The paper presents an updated baseline solution for named entity recognition in Ukrainian with 0.89 F1. The corpus is the largest of its kind for the Ukrainian language and is available for download.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chaplynskyi-romanyshyn-2024-introducing">
<titleInfo>
<title>Introducing NER-UK 2.0: A Rich Corpus of Named Entities for Ukrainian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmytro</namePart>
<namePart type="family">Chaplynskyi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nataliia</namePart>
<namePart type="family">Romanyshyn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrii</namePart>
<namePart type="family">Hlybovets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oleksii</namePart>
<namePart type="family">Ignatenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents NER-UK 2.0, a corpus of texts in the Ukrainian language manually annotated for the named entity recognition task. The corpus contains 560 texts of multiple genres, boasting 21,993 entities in total. The annotation scheme covers 13 entity types, namely location, person name, organization, artifact, document, job title, date, time, period, money, percentage, quantity, and miscellaneous. Such a rich set of entities makes the corpus valuable for training named-entity recognition models in various domains, including news, social media posts, legal documents, and procurement contracts. The paper presents an updated baseline solution for named entity recognition in Ukrainian with 0.89 F1. The corpus is the largest of its kind for the Ukrainian language and is available for download.</abstract>
<identifier type="citekey">chaplynskyi-romanyshyn-2024-introducing</identifier>
<location>
<url>https://aclanthology.org/2024.unlp-1.4</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>23</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Introducing NER-UK 2.0: A Rich Corpus of Named Entities for Ukrainian
%A Chaplynskyi, Dmytro
%A Romanyshyn, Mariana
%Y Romanyshyn, Mariana
%Y Romanyshyn, Nataliia
%Y Hlybovets, Andrii
%Y Ignatenko, Oleksii
%S Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F chaplynskyi-romanyshyn-2024-introducing
%X This paper presents NER-UK 2.0, a corpus of texts in the Ukrainian language manually annotated for the named entity recognition task. The corpus contains 560 texts of multiple genres, boasting 21,993 entities in total. The annotation scheme covers 13 entity types, namely location, person name, organization, artifact, document, job title, date, time, period, money, percentage, quantity, and miscellaneous. Such a rich set of entities makes the corpus valuable for training named-entity recognition models in various domains, including news, social media posts, legal documents, and procurement contracts. The paper presents an updated baseline solution for named entity recognition in Ukrainian with 0.89 F1. The corpus is the largest of its kind for the Ukrainian language and is available for download.
%U https://aclanthology.org/2024.unlp-1.4
%P 23-29
Markdown (Informal)
[Introducing NER-UK 2.0: A Rich Corpus of Named Entities for Ukrainian](https://aclanthology.org/2024.unlp-1.4) (Chaplynskyi & Romanyshyn, UNLP 2024)
ACL