@inproceedings{steimel-2018-part,
title = "Part of Speech Tagging in {L}uyia: A {B}antu Macrolanguage",
author = "Steimel, Kenneth",
editor = {Zampieri, Marcos and
Nakov, Preslav and
Ljube{\v{s}}i{\'c}, Nikola and
Tiedemann, J{\"o}rg and
Malmasi, Shervin and
Ali, Ahmed},
booktitle = "Proceedings of the Fifth Workshop on {NLP} for Similar Languages, Varieties and Dialects ({V}ar{D}ial 2018)",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-3905",
pages = "46--54",
abstract = "Luyia is a macrolanguage in central Kenya. The Luyia languages, like other Bantu languages, have a complex morphological system. This system can be leveraged to aid in part of speech tagging. Bag-of-characters taggers trained on a source Luyia language can be applied directly to another Luyia language with some degree of success. In addition, mixing data from the target language with data from the source language does produce more accurate predictive models compared to models trained on just the target language data when the training set size is small. However, for both of these tagging tasks, models involving the more distantly related language, Tiriki, are better at predicting part of speech tags for Wanga data. The models incorporating Bukusu data are not as successful despite the closer relationship between Bukusu and Wanga. Overlapping vocabulary between the Wanga and Tiriki corpora as well as a bias towards open class words help Tiriki outperform Bukusu.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="steimel-2018-part">
<titleInfo>
<title>Part of Speech Tagging in Luyia: A Bantu Macrolanguage</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Steimel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikola</namePart>
<namePart type="family">Ljubešić</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Ali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Luyia is a macrolanguage in central Kenya. The Luyia languages, like other Bantu languages, have a complex morphological system. This system can be leveraged to aid in part of speech tagging. Bag-of-characters taggers trained on a source Luyia language can be applied directly to another Luyia language with some degree of success. In addition, mixing data from the target language with data from the source language does produce more accurate predictive models compared to models trained on just the target language data when the training set size is small. However, for both of these tagging tasks, models involving the more distantly related language, Tiriki, are better at predicting part of speech tags for Wanga data. The models incorporating Bukusu data are not as successful despite the closer relationship between Bukusu and Wanga. Overlapping vocabulary between the Wanga and Tiriki corpora as well as a bias towards open class words help Tiriki outperform Bukusu.</abstract>
<identifier type="citekey">steimel-2018-part</identifier>
<location>
<url>https://aclanthology.org/W18-3905</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>46</start>
<end>54</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Part of Speech Tagging in Luyia: A Bantu Macrolanguage
%A Steimel, Kenneth
%Y Zampieri, Marcos
%Y Nakov, Preslav
%Y Ljubešić, Nikola
%Y Tiedemann, Jörg
%Y Malmasi, Shervin
%Y Ali, Ahmed
%S Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018)
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F steimel-2018-part
%X Luyia is a macrolanguage in central Kenya. The Luyia languages, like other Bantu languages, have a complex morphological system. This system can be leveraged to aid in part of speech tagging. Bag-of-characters taggers trained on a source Luyia language can be applied directly to another Luyia language with some degree of success. In addition, mixing data from the target language with data from the source language does produce more accurate predictive models compared to models trained on just the target language data when the training set size is small. However, for both of these tagging tasks, models involving the more distantly related language, Tiriki, are better at predicting part of speech tags for Wanga data. The models incorporating Bukusu data are not as successful despite the closer relationship between Bukusu and Wanga. Overlapping vocabulary between the Wanga and Tiriki corpora as well as a bias towards open class words help Tiriki outperform Bukusu.
%U https://aclanthology.org/W18-3905
%P 46-54
Markdown (Informal)
[Part of Speech Tagging in Luyia: A Bantu Macrolanguage](https://aclanthology.org/W18-3905) (Steimel, VarDial 2018)
ACL