@inproceedings{cabrera-diego-etal-2021-using,
title = "Using a Frustratingly Easy Domain and Tagset Adaptation for Creating {S}lavic Named Entity Recognition Systems",
author = "Cabrera-Diego, Luis Adri{\'a}n and
Moreno, Jose G. and
Doucet, Antoine",
editor = "Babych, Bogdan and
Kanishcheva, Olga and
Nakov, Preslav and
Piskorski, Jakub and
Pivovarova, Lidia and
Starko, Vasyl and
Steinberger, Josef and
Yangarber, Roman and
Marci{\'n}czuk, Micha{\l} and
Pollak, Senja and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Robnik-{\v{S}}ikonja, Marko",
booktitle = "Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing",
month = apr,
year = "2021",
address = "Kiyv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.bsnlp-1.12",
pages = "98--104",
abstract = "We present a collection of Named Entity Recognition (NER) systems for six Slavic languages: Bulgarian, Czech, Polish, Slovenian, Russian and Ukrainian. These NER systems have been trained using different BERT models and a Frustratingly Easy Domain Adaptation (FEDA). FEDA allow us creating NER systems using multiple datasets without having to worry about whether the tagset (e.g. Location, Event, Miscellaneous, Time) in the source and target domains match, while increasing the amount of data available for training. Moreover, we boosted the prediction on named entities by marking uppercase words and predicting masked words. Participating in the 3rd Shared Task on SlavNER, our NER systems reached a strict match micro F-score of up to 0.908. The results demonstrate good generalization, even in named entities with weak regularity, such as book titles, or entities that were never seen during the training.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cabrera-diego-etal-2021-using">
<titleInfo>
<title>Using a Frustratingly Easy Domain and Tagset Adaptation for Creating Slavic Named Entity Recognition Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="given">Adrián</namePart>
<namePart type="family">Cabrera-Diego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="given">G</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antoine</namePart>
<namePart type="family">Doucet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bogdan</namePart>
<namePart type="family">Babych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Olga</namePart>
<namePart type="family">Kanishcheva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lidia</namePart>
<namePart type="family">Pivovarova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasyl</namePart>
<namePart type="family">Starko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="family">Marcińczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Přibáň</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Robnik-Šikonja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kiyv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a collection of Named Entity Recognition (NER) systems for six Slavic languages: Bulgarian, Czech, Polish, Slovenian, Russian and Ukrainian. These NER systems have been trained using different BERT models and a Frustratingly Easy Domain Adaptation (FEDA). FEDA allow us creating NER systems using multiple datasets without having to worry about whether the tagset (e.g. Location, Event, Miscellaneous, Time) in the source and target domains match, while increasing the amount of data available for training. Moreover, we boosted the prediction on named entities by marking uppercase words and predicting masked words. Participating in the 3rd Shared Task on SlavNER, our NER systems reached a strict match micro F-score of up to 0.908. The results demonstrate good generalization, even in named entities with weak regularity, such as book titles, or entities that were never seen during the training.</abstract>
<identifier type="citekey">cabrera-diego-etal-2021-using</identifier>
<location>
<url>https://aclanthology.org/2021.bsnlp-1.12</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>98</start>
<end>104</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Using a Frustratingly Easy Domain and Tagset Adaptation for Creating Slavic Named Entity Recognition Systems
%A Cabrera-Diego, Luis Adrián
%A Moreno, Jose G.
%A Doucet, Antoine
%Y Babych, Bogdan
%Y Kanishcheva, Olga
%Y Nakov, Preslav
%Y Piskorski, Jakub
%Y Pivovarova, Lidia
%Y Starko, Vasyl
%Y Steinberger, Josef
%Y Yangarber, Roman
%Y Marcińczuk, Michał
%Y Pollak, Senja
%Y Přibáň, Pavel
%Y Robnik-Šikonja, Marko
%S Proceedings of the 8th Workshop on Balto-Slavic Natural Language Processing
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kiyv, Ukraine
%F cabrera-diego-etal-2021-using
%X We present a collection of Named Entity Recognition (NER) systems for six Slavic languages: Bulgarian, Czech, Polish, Slovenian, Russian and Ukrainian. These NER systems have been trained using different BERT models and a Frustratingly Easy Domain Adaptation (FEDA). FEDA allow us creating NER systems using multiple datasets without having to worry about whether the tagset (e.g. Location, Event, Miscellaneous, Time) in the source and target domains match, while increasing the amount of data available for training. Moreover, we boosted the prediction on named entities by marking uppercase words and predicting masked words. Participating in the 3rd Shared Task on SlavNER, our NER systems reached a strict match micro F-score of up to 0.908. The results demonstrate good generalization, even in named entities with weak regularity, such as book titles, or entities that were never seen during the training.
%U https://aclanthology.org/2021.bsnlp-1.12
%P 98-104
Markdown (Informal)
[Using a Frustratingly Easy Domain and Tagset Adaptation for Creating Slavic Named Entity Recognition Systems](https://aclanthology.org/2021.bsnlp-1.12) (Cabrera-Diego et al., BSNLP 2021)
ACL