@inproceedings{mahlaza-etal-2025-isizulu,
title = "{I}si{Z}ulu noun classification based on replicating the ensemble approach for {R}unyankore",
author = "Mahlaza, Zola and
Keet, C. Maria and
Sayed, Imaan and
Van Der Leek, Alexander",
editor = "Hettiarachchi, Hansi and
Ranasinghe, Tharindu and
Rayson, Paul and
Mitkov, Ruslan and
Gaber, Mohamed and
Premasiri, Damith and
Tan, Fiona Anting and
Uyangodage, Lasitha",
booktitle = "Proceedings of the First Workshop on Language Models for Low-Resource Languages",
month = jan,
year = "2025",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.loreslm-1.35/",
pages = "469--478",
abstract = "A noun`s class is a crucial component in NLP, because it governs agreement across the sentence in Niger Congo B (NCB) languages, among others. The phenomenon is ill-documented in most NCB languages, or in a non-reusable format, such as a printed dictionary subject to copyright restrictions. A promising approach by Byamugisha (2022) used a data-driven approach for Runyankore that combined syntax and semantics. The code and data are inaccessible however, and it remains to be seen whether it is suitable for other NCB languages. We aimed to reproduce Byamugisha`s experiment, but then for isiZulu. We conducted this as two independent experiments, so that we also could subject it to a meta-analysis. Results showed that it was reproducible only in part, mainly due to imprecision in the original description, and the current impossibility to generate the same kind of source data set generated from an existing grammar. The different choices made in attempting to reproduce the pipeline as well as differences in choice of training and test data had a large effect on the eventual accuracy of noun class disambiguation but could produce accuracies in the same range as for Runyankore: 80-85{\%}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahlaza-etal-2025-isizulu">
<titleInfo>
<title>IsiZulu noun classification based on replicating the ensemble approach for Runyankore</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zola</namePart>
<namePart type="family">Mahlaza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imaan</namePart>
<namePart type="family">Sayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Van Der Leek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-01</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Language Models for Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hansi</namePart>
<namePart type="family">Hettiarachchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Ranasinghe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Rayson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Gaber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damith</namePart>
<namePart type="family">Premasiri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fiona</namePart>
<namePart type="given">Anting</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lasitha</namePart>
<namePart type="family">Uyangodage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A noun‘s class is a crucial component in NLP, because it governs agreement across the sentence in Niger Congo B (NCB) languages, among others. The phenomenon is ill-documented in most NCB languages, or in a non-reusable format, such as a printed dictionary subject to copyright restrictions. A promising approach by Byamugisha (2022) used a data-driven approach for Runyankore that combined syntax and semantics. The code and data are inaccessible however, and it remains to be seen whether it is suitable for other NCB languages. We aimed to reproduce Byamugisha‘s experiment, but then for isiZulu. We conducted this as two independent experiments, so that we also could subject it to a meta-analysis. Results showed that it was reproducible only in part, mainly due to imprecision in the original description, and the current impossibility to generate the same kind of source data set generated from an existing grammar. The different choices made in attempting to reproduce the pipeline as well as differences in choice of training and test data had a large effect on the eventual accuracy of noun class disambiguation but could produce accuracies in the same range as for Runyankore: 80-85%.</abstract>
<identifier type="citekey">mahlaza-etal-2025-isizulu</identifier>
<location>
<url>https://aclanthology.org/2025.loreslm-1.35/</url>
</location>
<part>
<date>2025-01</date>
<extent unit="page">
<start>469</start>
<end>478</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IsiZulu noun classification based on replicating the ensemble approach for Runyankore
%A Mahlaza, Zola
%A Keet, C. Maria
%A Sayed, Imaan
%A Van Der Leek, Alexander
%Y Hettiarachchi, Hansi
%Y Ranasinghe, Tharindu
%Y Rayson, Paul
%Y Mitkov, Ruslan
%Y Gaber, Mohamed
%Y Premasiri, Damith
%Y Tan, Fiona Anting
%Y Uyangodage, Lasitha
%S Proceedings of the First Workshop on Language Models for Low-Resource Languages
%D 2025
%8 January
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F mahlaza-etal-2025-isizulu
%X A noun‘s class is a crucial component in NLP, because it governs agreement across the sentence in Niger Congo B (NCB) languages, among others. The phenomenon is ill-documented in most NCB languages, or in a non-reusable format, such as a printed dictionary subject to copyright restrictions. A promising approach by Byamugisha (2022) used a data-driven approach for Runyankore that combined syntax and semantics. The code and data are inaccessible however, and it remains to be seen whether it is suitable for other NCB languages. We aimed to reproduce Byamugisha‘s experiment, but then for isiZulu. We conducted this as two independent experiments, so that we also could subject it to a meta-analysis. Results showed that it was reproducible only in part, mainly due to imprecision in the original description, and the current impossibility to generate the same kind of source data set generated from an existing grammar. The different choices made in attempting to reproduce the pipeline as well as differences in choice of training and test data had a large effect on the eventual accuracy of noun class disambiguation but could produce accuracies in the same range as for Runyankore: 80-85%.
%U https://aclanthology.org/2025.loreslm-1.35/
%P 469-478
Markdown (Informal)
[IsiZulu noun classification based on replicating the ensemble approach for Runyankore](https://aclanthology.org/2025.loreslm-1.35/) (Mahlaza et al., LoResLM 2025)
ACL