@inproceedings{singh-etal-2023-many,
title = "Too Many Cooks Spoil the Model: Are Bilingual Models for {S}lovene Better than a Large Multilingual Model?",
author = "Singh, Pranaydeep and
Maladry, Aaron and
Lefever, Els",
editor = "Piskorski, Jakub and
Marci{\'n}czuk, Micha{\l} and
Nakov, Preslav and
Ogrodniczuk, Maciej and
Pollak, Senja and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Rybak, Piotr and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bsnlp-1.5",
doi = "10.18653/v1/2023.bsnlp-1.5",
pages = "32--39",
abstract = "This paper investigates whether adding data of typologically closer languages improves the performance of transformer-based models for three different downstream tasks, namely Part-of-Speech tagging, Named Entity Recognition, and Sentiment Analysis, compared to a monolingual and plain multilingual language model. For the presented pilot study, we performed experiments for the use case of Slovene, a low(er)-resourced language belonging to the Slavic language family. The experiments were carried out in a controlled setting, where a monolingual model for Slovene was compared to combined language models containing Slovene, trained with the same amount of Slovene data. The experimental results show that adding typologically closer languages indeed improves the performance of the Slovene language model, and even succeeds in outperforming the large multilingual XLM-RoBERTa model for NER and PoS-tagging. We also reveal that, contrary to intuition, distantly or unrelated languages also combine admirably with Slovene, often out-performing XLM-R as well. All the bilingual models used in the experiments are publicly available at \url{https://github.com/pranaydeeps/BLAIR}",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-etal-2023-many">
<titleInfo>
<title>Too Many Cooks Spoil the Model: Are Bilingual Models for Slovene Better than a Large Multilingual Model?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pranaydeep</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aaron</namePart>
<namePart type="family">Maladry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Els</namePart>
<namePart type="family">Lefever</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="family">Marcińczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Přibáň</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Rybak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates whether adding data of typologically closer languages improves the performance of transformer-based models for three different downstream tasks, namely Part-of-Speech tagging, Named Entity Recognition, and Sentiment Analysis, compared to a monolingual and plain multilingual language model. For the presented pilot study, we performed experiments for the use case of Slovene, a low(er)-resourced language belonging to the Slavic language family. The experiments were carried out in a controlled setting, where a monolingual model for Slovene was compared to combined language models containing Slovene, trained with the same amount of Slovene data. The experimental results show that adding typologically closer languages indeed improves the performance of the Slovene language model, and even succeeds in outperforming the large multilingual XLM-RoBERTa model for NER and PoS-tagging. We also reveal that, contrary to intuition, distantly or unrelated languages also combine admirably with Slovene, often out-performing XLM-R as well. All the bilingual models used in the experiments are publicly available at https://github.com/pranaydeeps/BLAIR</abstract>
<identifier type="citekey">singh-etal-2023-many</identifier>
<identifier type="doi">10.18653/v1/2023.bsnlp-1.5</identifier>
<location>
<url>https://aclanthology.org/2023.bsnlp-1.5</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>32</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Too Many Cooks Spoil the Model: Are Bilingual Models for Slovene Better than a Large Multilingual Model?
%A Singh, Pranaydeep
%A Maladry, Aaron
%A Lefever, Els
%Y Piskorski, Jakub
%Y Marcińczuk, Michał
%Y Nakov, Preslav
%Y Ogrodniczuk, Maciej
%Y Pollak, Senja
%Y Přibáň, Pavel
%Y Rybak, Piotr
%Y Steinberger, Josef
%Y Yangarber, Roman
%S Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F singh-etal-2023-many
%X This paper investigates whether adding data of typologically closer languages improves the performance of transformer-based models for three different downstream tasks, namely Part-of-Speech tagging, Named Entity Recognition, and Sentiment Analysis, compared to a monolingual and plain multilingual language model. For the presented pilot study, we performed experiments for the use case of Slovene, a low(er)-resourced language belonging to the Slavic language family. The experiments were carried out in a controlled setting, where a monolingual model for Slovene was compared to combined language models containing Slovene, trained with the same amount of Slovene data. The experimental results show that adding typologically closer languages indeed improves the performance of the Slovene language model, and even succeeds in outperforming the large multilingual XLM-RoBERTa model for NER and PoS-tagging. We also reveal that, contrary to intuition, distantly or unrelated languages also combine admirably with Slovene, often out-performing XLM-R as well. All the bilingual models used in the experiments are publicly available at https://github.com/pranaydeeps/BLAIR
%R 10.18653/v1/2023.bsnlp-1.5
%U https://aclanthology.org/2023.bsnlp-1.5
%U https://doi.org/10.18653/v1/2023.bsnlp-1.5
%P 32-39
Markdown (Informal)
[Too Many Cooks Spoil the Model: Are Bilingual Models for Slovene Better than a Large Multilingual Model?](https://aclanthology.org/2023.bsnlp-1.5) (Singh et al., BSNLP 2023)
ACL