@inproceedings{sirts-2023-estonian,
title = "{E}stonian Named Entity Recognition: New Datasets and Models",
author = "Sirts, Kairit",
editor = {Alum{\"a}e, Tanel and
Fishel, Mark},
booktitle = "Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)",
month = may,
year = "2023",
address = "T{\'o}rshavn, Faroe Islands",
publisher = "University of Tartu Library",
url = "https://aclanthology.org/2023.nodalida-1.76",
pages = "752--761",
abstract = "This paper presents the annotation process of two Estonian named entity recognition (NER) datasets, involving the creation of annotation guidelines for labeling eleven different types of entities. In addition to the commonly annotated entities such as person names, organization names, and locations, the annotation scheme encompasses geopolitical entities, product names, titles/roles, events, dates, times, monetary values, and percents. The annotation was performed on two datasets, one involving reannotating an existing NER dataset primarily composed of news texts and the other incorporating new texts from news and social media domains. Transformer-based models were trained on these annotated datasets to establish baseline predictive performance. Our findings indicate that the best results were achieved by training a single model on the combined dataset, suggesting that the domain differences between the datasets are relatively small.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sirts-2023-estonian">
<titleInfo>
<title>Estonian Named Entity Recognition: New Datasets and Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kairit</namePart>
<namePart type="family">Sirts</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tanel</namePart>
<namePart type="family">Alumäe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Fishel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>University of Tartu Library</publisher>
<place>
<placeTerm type="text">Tórshavn, Faroe Islands</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the annotation process of two Estonian named entity recognition (NER) datasets, involving the creation of annotation guidelines for labeling eleven different types of entities. In addition to the commonly annotated entities such as person names, organization names, and locations, the annotation scheme encompasses geopolitical entities, product names, titles/roles, events, dates, times, monetary values, and percents. The annotation was performed on two datasets, one involving reannotating an existing NER dataset primarily composed of news texts and the other incorporating new texts from news and social media domains. Transformer-based models were trained on these annotated datasets to establish baseline predictive performance. Our findings indicate that the best results were achieved by training a single model on the combined dataset, suggesting that the domain differences between the datasets are relatively small.</abstract>
<identifier type="citekey">sirts-2023-estonian</identifier>
<location>
<url>https://aclanthology.org/2023.nodalida-1.76</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>752</start>
<end>761</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Estonian Named Entity Recognition: New Datasets and Models
%A Sirts, Kairit
%Y Alumäe, Tanel
%Y Fishel, Mark
%S Proceedings of the 24th Nordic Conference on Computational Linguistics (NoDaLiDa)
%D 2023
%8 May
%I University of Tartu Library
%C Tórshavn, Faroe Islands
%F sirts-2023-estonian
%X This paper presents the annotation process of two Estonian named entity recognition (NER) datasets, involving the creation of annotation guidelines for labeling eleven different types of entities. In addition to the commonly annotated entities such as person names, organization names, and locations, the annotation scheme encompasses geopolitical entities, product names, titles/roles, events, dates, times, monetary values, and percents. The annotation was performed on two datasets, one involving reannotating an existing NER dataset primarily composed of news texts and the other incorporating new texts from news and social media domains. Transformer-based models were trained on these annotated datasets to establish baseline predictive performance. Our findings indicate that the best results were achieved by training a single model on the combined dataset, suggesting that the domain differences between the datasets are relatively small.
%U https://aclanthology.org/2023.nodalida-1.76
%P 752-761
Markdown (Informal)
[Estonian Named Entity Recognition: New Datasets and Models](https://aclanthology.org/2023.nodalida-1.76) (Sirts, NoDaLiDa 2023)
ACL