@inproceedings{steimel-etal-2023-towards,
title = "Towards a {S}wahili {U}niversal {D}ependency Treebank: Leveraging the Annotations of the {H}elsinki Corpus of {S}wahili",
author = {Steimel, Kenneth and
K{\"u}bler, Sandra},
editor = "Mabuya, Rooweither and
Mthobela, Don and
Setaka, Mmasibidi and
Van Zaanen, Menno",
booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.rail-1.10",
doi = "10.18653/v1/2023.rail-1.10",
pages = "86--96",
abstract = "Dependency annotation can be a laborious process for under-resourced languages. However, in some cases, other resources are available. We investigate whether we can leverage such resources in the case of Swahili: We use the Helsinki Corpus of Swahili for creating a Universal Depedencies treebank for Swahili. The Helsinki Corpus of Swahili provides word-level annotations for part of speech tags, morphological features, and functional syntactic tags. We train neural taggers for these types of annotations, then use those models to annotate our target corpus, the Swahili portion of the OPUS Global Voices Corpus. Based on those annotations, we then manually create constraint grammar rules to annotate the target corpus for Universal Dependencies. In this paper, we describe the process, discuss the annotation decisions we had to make, and we evaluate the approach.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steimel-etal-2023-towards">
<titleInfo>
<title>Towards a Swahili Universal Dependency Treebank: Leveraging the Annotations of the Helsinki Corpus of Swahili</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Steimel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kübler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rooweither</namePart>
<namePart type="family">Mabuya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Don</namePart>
<namePart type="family">Mthobela</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mmasibidi</namePart>
<namePart type="family">Setaka</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Menno</namePart>
<namePart type="family">Van Zaanen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Dependency annotation can be a laborious process for under-resourced languages. However, in some cases, other resources are available. We investigate whether we can leverage such resources in the case of Swahili: We use the Helsinki Corpus of Swahili for creating a Universal Depedencies treebank for Swahili. The Helsinki Corpus of Swahili provides word-level annotations for part of speech tags, morphological features, and functional syntactic tags. We train neural taggers for these types of annotations, then use those models to annotate our target corpus, the Swahili portion of the OPUS Global Voices Corpus. Based on those annotations, we then manually create constraint grammar rules to annotate the target corpus for Universal Dependencies. In this paper, we describe the process, discuss the annotation decisions we had to make, and we evaluate the approach.</abstract>
<identifier type="citekey">steimel-etal-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.rail-1.10</identifier>
<location>
<url>https://aclanthology.org/2023.rail-1.10</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>86</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards a Swahili Universal Dependency Treebank: Leveraging the Annotations of the Helsinki Corpus of Swahili
%A Steimel, Kenneth
%A Kübler, Sandra
%Y Mabuya, Rooweither
%Y Mthobela, Don
%Y Setaka, Mmasibidi
%Y Van Zaanen, Menno
%S Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F steimel-etal-2023-towards
%X Dependency annotation can be a laborious process for under-resourced languages. However, in some cases, other resources are available. We investigate whether we can leverage such resources in the case of Swahili: We use the Helsinki Corpus of Swahili for creating a Universal Depedencies treebank for Swahili. The Helsinki Corpus of Swahili provides word-level annotations for part of speech tags, morphological features, and functional syntactic tags. We train neural taggers for these types of annotations, then use those models to annotate our target corpus, the Swahili portion of the OPUS Global Voices Corpus. Based on those annotations, we then manually create constraint grammar rules to annotate the target corpus for Universal Dependencies. In this paper, we describe the process, discuss the annotation decisions we had to make, and we evaluate the approach.
%R 10.18653/v1/2023.rail-1.10
%U https://aclanthology.org/2023.rail-1.10
%U https://doi.org/10.18653/v1/2023.rail-1.10
%P 86-96
Markdown (Informal)
[Towards a Swahili Universal Dependency Treebank: Leveraging the Annotations of the Helsinki Corpus of Swahili](https://aclanthology.org/2023.rail-1.10) (Steimel & Kübler, RAIL 2023)
ACL