@inproceedings{ion-etal-2024-cross,
title = "A Cross-model Study on Learning {R}omanian Parts of Speech with Transformer Models",
author = "Ion, Radu and
Barbu Mititelu, Verginica and
P{\u{a}}i{\c{s}}, Vasile and
Irimia, Elena and
Badea, Valentin",
booktitle = "Proceedings of the Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024)",
month = sep,
year = "2024",
address = "Sofia, Bulgaria",
publisher = "Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences",
url = "https://aclanthology.org/2024.clib-1.1",
pages = "6--13",
abstract = "This paper will attempt to determine experimentally if POS tagging of unseen words produces comparable performance, in terms of accuracy, as for words that were rarely seen in the training set (i.e. frequency less than 5), or more frequently seen (i.e. frequency greater than 10). To compare accuracies objectively, we will use the odds ratio statistic and its confidence interval testing to show that odds of being correct on unseen words are close to odds of being correct on rarely seen words. For the training of the POS taggers, we use different Romanian BERT models that are freely available on HuggingFace.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ion-etal-2024-cross">
<titleInfo>
<title>A Cross-model Study on Learning Romanian Parts of Speech with Transformer Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Radu</namePart>
<namePart type="family">Ion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verginica</namePart>
<namePart type="family">Barbu Mititelu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasile</namePart>
<namePart type="family">Păiş</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Irimia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Badea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024)</title>
</titleInfo>
<originInfo>
<publisher>Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences</publisher>
<place>
<placeTerm type="text">Sofia, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper will attempt to determine experimentally if POS tagging of unseen words produces comparable performance, in terms of accuracy, as for words that were rarely seen in the training set (i.e. frequency less than 5), or more frequently seen (i.e. frequency greater than 10). To compare accuracies objectively, we will use the odds ratio statistic and its confidence interval testing to show that odds of being correct on unseen words are close to odds of being correct on rarely seen words. For the training of the POS taggers, we use different Romanian BERT models that are freely available on HuggingFace.</abstract>
<identifier type="citekey">ion-etal-2024-cross</identifier>
<location>
<url>https://aclanthology.org/2024.clib-1.1</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>6</start>
<end>13</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Cross-model Study on Learning Romanian Parts of Speech with Transformer Models
%A Ion, Radu
%A Barbu Mititelu, Verginica
%A Păiş, Vasile
%A Irimia, Elena
%A Badea, Valentin
%S Proceedings of the Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024)
%D 2024
%8 September
%I Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences
%C Sofia, Bulgaria
%F ion-etal-2024-cross
%X This paper will attempt to determine experimentally if POS tagging of unseen words produces comparable performance, in terms of accuracy, as for words that were rarely seen in the training set (i.e. frequency less than 5), or more frequently seen (i.e. frequency greater than 10). To compare accuracies objectively, we will use the odds ratio statistic and its confidence interval testing to show that odds of being correct on unseen words are close to odds of being correct on rarely seen words. For the training of the POS taggers, we use different Romanian BERT models that are freely available on HuggingFace.
%U https://aclanthology.org/2024.clib-1.1
%P 6-13
Markdown (Informal)
[A Cross-model Study on Learning Romanian Parts of Speech with Transformer Models](https://aclanthology.org/2024.clib-1.1) (Ion et al., CLIB 2024)
ACL
- Radu Ion, Verginica Barbu Mititelu, Vasile Păiş, Elena Irimia, and Valentin Badea. 2024. A Cross-model Study on Learning Romanian Parts of Speech with Transformer Models. In Proceedings of the Sixth International Conference on Computational Linguistics in Bulgaria (CLIB 2024), pages 6–13, Sofia, Bulgaria. Department of Computational Linguistics, Institute for Bulgarian Language, Bulgarian Academy of Sciences.