@inproceedings{vandenbulcke-etal-2024-recipe,
title = "Recipe for Zero-shot {POS} Tagging: Is It Useful in Realistic Scenarios?",
author = "Vandenbulcke, Zeno and
Vermeire, Lukas and
de Lhoneux, Miryam",
editor = {S{\"a}lev{\"a}, Jonne and
Owodunni, Abraham},
booktitle = "Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.mrl-1.9",
pages = "137--147",
abstract = "POS tagging plays a fundamental role in numerous applications. While POS taggers are highly accurate in well-resourced settings, they lag behind in cases of limited or missing training data. This paper focuses on POS tagging for languages with limited data. We seek to identify favourable characteristics of datasets for training POS tagging models using related languages without specific training on the target language. This is a zero-shot approach. We investigate both mono- and multilingual models trained on related languages and compare their accuracies. Additionally, we compare these results with models trained directly on the target language itself. We do this for three target low-resource languages, for each of which we select several support languages. Our research highlights the importance of accurate dataset selection for developing effective zero-shot POS tagging models. Particularly, a strong linguistic relationship and high-quality datasets ensure optimal results. For extremely low-resource languages, zero-shot training proves to be a viable option.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vandenbulcke-etal-2024-recipe">
<titleInfo>
<title>Recipe for Zero-shot POS Tagging: Is It Useful in Realistic Scenarios?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zeno</namePart>
<namePart type="family">Vandenbulcke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Vermeire</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miryam</namePart>
<namePart type="family">de Lhoneux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonne</namePart>
<namePart type="family">Sälevä</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abraham</namePart>
<namePart type="family">Owodunni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>POS tagging plays a fundamental role in numerous applications. While POS taggers are highly accurate in well-resourced settings, they lag behind in cases of limited or missing training data. This paper focuses on POS tagging for languages with limited data. We seek to identify favourable characteristics of datasets for training POS tagging models using related languages without specific training on the target language. This is a zero-shot approach. We investigate both mono- and multilingual models trained on related languages and compare their accuracies. Additionally, we compare these results with models trained directly on the target language itself. We do this for three target low-resource languages, for each of which we select several support languages. Our research highlights the importance of accurate dataset selection for developing effective zero-shot POS tagging models. Particularly, a strong linguistic relationship and high-quality datasets ensure optimal results. For extremely low-resource languages, zero-shot training proves to be a viable option.</abstract>
<identifier type="citekey">vandenbulcke-etal-2024-recipe</identifier>
<location>
<url>https://aclanthology.org/2024.mrl-1.9</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>137</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Recipe for Zero-shot POS Tagging: Is It Useful in Realistic Scenarios?
%A Vandenbulcke, Zeno
%A Vermeire, Lukas
%A de Lhoneux, Miryam
%Y Sälevä, Jonne
%Y Owodunni, Abraham
%S Proceedings of the Fourth Workshop on Multilingual Representation Learning (MRL 2024)
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F vandenbulcke-etal-2024-recipe
%X POS tagging plays a fundamental role in numerous applications. While POS taggers are highly accurate in well-resourced settings, they lag behind in cases of limited or missing training data. This paper focuses on POS tagging for languages with limited data. We seek to identify favourable characteristics of datasets for training POS tagging models using related languages without specific training on the target language. This is a zero-shot approach. We investigate both mono- and multilingual models trained on related languages and compare their accuracies. Additionally, we compare these results with models trained directly on the target language itself. We do this for three target low-resource languages, for each of which we select several support languages. Our research highlights the importance of accurate dataset selection for developing effective zero-shot POS tagging models. Particularly, a strong linguistic relationship and high-quality datasets ensure optimal results. For extremely low-resource languages, zero-shot training proves to be a viable option.
%U https://aclanthology.org/2024.mrl-1.9
%P 137-147
Markdown (Informal)
[Recipe for Zero-shot POS Tagging: Is It Useful in Realistic Scenarios?](https://aclanthology.org/2024.mrl-1.9) (Vandenbulcke et al., MRL 2024)
ACL