@inproceedings{lee-bloem-2023-comparing,
title = "Comparing domain-specific and domain-general {BERT} variants for inferred real-world knowledge through rare grammatical features in {S}erbian",
author = "Lee, Sofia and
Bloem, Jelke",
editor = "Piskorski, Jakub and
Marci{\'n}czuk, Micha{\l} and
Nakov, Preslav and
Ogrodniczuk, Maciej and
Pollak, Senja and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Rybak, Piotr and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bsnlp-1.7",
doi = "10.18653/v1/2023.bsnlp-1.7",
pages = "47--60",
abstract = "Transfer learning is one of the prevailing approaches towards training language-specific BERT models. However, some languages have uncommon features that may prove to be challenging to more domain-general models but not domain-specific models. Comparing the performance of BERTi{\'c}, a Bosnian-Croatian-Montenegrin-Serbian model, and Multilingual BERT on a Named-Entity Recognition (NER) task and Masked Language Modelling (MLM) task based around a rare phenomenon of indeclinable female foreign names in Serbian reveals how the different training approaches impacts their performance. Multilingual BERT is shown to perform better than BERTi{\'c} in the NER task, but BERTi{\'c} greatly exceeds in the MLM task. Thus, there are applications both for domain-general training and domain-specific training depending on the tasks at hand.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-bloem-2023-comparing">
<titleInfo>
<title>Comparing domain-specific and domain-general BERT variants for inferred real-world knowledge through rare grammatical features in Serbian</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sofia</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jelke</namePart>
<namePart type="family">Bloem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="family">Marcińczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Přibáň</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Rybak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Transfer learning is one of the prevailing approaches towards training language-specific BERT models. However, some languages have uncommon features that may prove to be challenging to more domain-general models but not domain-specific models. Comparing the performance of BERTić, a Bosnian-Croatian-Montenegrin-Serbian model, and Multilingual BERT on a Named-Entity Recognition (NER) task and Masked Language Modelling (MLM) task based around a rare phenomenon of indeclinable female foreign names in Serbian reveals how the different training approaches impacts their performance. Multilingual BERT is shown to perform better than BERTić in the NER task, but BERTić greatly exceeds in the MLM task. Thus, there are applications both for domain-general training and domain-specific training depending on the tasks at hand.</abstract>
<identifier type="citekey">lee-bloem-2023-comparing</identifier>
<identifier type="doi">10.18653/v1/2023.bsnlp-1.7</identifier>
<location>
<url>https://aclanthology.org/2023.bsnlp-1.7</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>47</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing domain-specific and domain-general BERT variants for inferred real-world knowledge through rare grammatical features in Serbian
%A Lee, Sofia
%A Bloem, Jelke
%Y Piskorski, Jakub
%Y Marcińczuk, Michał
%Y Nakov, Preslav
%Y Ogrodniczuk, Maciej
%Y Pollak, Senja
%Y Přibáň, Pavel
%Y Rybak, Piotr
%Y Steinberger, Josef
%Y Yangarber, Roman
%S Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F lee-bloem-2023-comparing
%X Transfer learning is one of the prevailing approaches towards training language-specific BERT models. However, some languages have uncommon features that may prove to be challenging to more domain-general models but not domain-specific models. Comparing the performance of BERTić, a Bosnian-Croatian-Montenegrin-Serbian model, and Multilingual BERT on a Named-Entity Recognition (NER) task and Masked Language Modelling (MLM) task based around a rare phenomenon of indeclinable female foreign names in Serbian reveals how the different training approaches impacts their performance. Multilingual BERT is shown to perform better than BERTić in the NER task, but BERTić greatly exceeds in the MLM task. Thus, there are applications both for domain-general training and domain-specific training depending on the tasks at hand.
%R 10.18653/v1/2023.bsnlp-1.7
%U https://aclanthology.org/2023.bsnlp-1.7
%U https://doi.org/10.18653/v1/2023.bsnlp-1.7
%P 47-60
Markdown (Informal)
[Comparing domain-specific and domain-general BERT variants for inferred real-world knowledge through rare grammatical features in Serbian](https://aclanthology.org/2023.bsnlp-1.7) (Lee & Bloem, BSNLP 2023)
ACL