@inproceedings{mahlaza-etal-2024-automatically-generating,
title = "Automatically Generating {I}si{Z}ulu Words From {I}ndo-{A}rabic Numerals",
author = "Mahlaza, Zola and
Magwenzi, Tadiwa and
Keet, C. Maria and
Khumalo, Langa",
editor = "Mahamood, Saad and
Minh, Nguyen Le and
Ippolito, Daphne",
booktitle = "Proceedings of the 17th International Natural Language Generation Conference",
month = sep,
year = "2024",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.inlg-main.21",
pages = "254--271",
abstract = "Artificial conversational agents are deployed to assist humans in a variety of tasks. Some of these tasks require the capability to communicate numbers as part of their internal and abstract representations of meaning, such as for banking and scheduling appointments. They currently cannot do so for isiZulu because there are no algorithms to do so due to a lack of speech and text data and the transformation is complex and it may include dependence on the type of noun that is counted. We solved this by extracting and iteratively improving on the rules for speaking and writing numerals as words and creating two algorithms to automate the transformation. Evaluation of the algorithms by two isiZulu grammarians showed that six out of seven number categories were 90-100{\%} correct. The same software was used with an additional set of rules to create a large monolingual text corpus, made up of 771 643 sentences, to enable future data-driven approaches.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mahlaza-etal-2024-automatically-generating">
<titleInfo>
<title>Automatically Generating IsiZulu Words From Indo-Arabic Numerals</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zola</namePart>
<namePart type="family">Mahlaza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tadiwa</namePart>
<namePart type="family">Magwenzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">C</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Keet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Langa</namePart>
<namePart type="family">Khumalo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Natural Language Generation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saad</namePart>
<namePart type="family">Mahamood</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nguyen</namePart>
<namePart type="given">Le</namePart>
<namePart type="family">Minh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daphne</namePart>
<namePart type="family">Ippolito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Artificial conversational agents are deployed to assist humans in a variety of tasks. Some of these tasks require the capability to communicate numbers as part of their internal and abstract representations of meaning, such as for banking and scheduling appointments. They currently cannot do so for isiZulu because there are no algorithms to do so due to a lack of speech and text data and the transformation is complex and it may include dependence on the type of noun that is counted. We solved this by extracting and iteratively improving on the rules for speaking and writing numerals as words and creating two algorithms to automate the transformation. Evaluation of the algorithms by two isiZulu grammarians showed that six out of seven number categories were 90-100% correct. The same software was used with an additional set of rules to create a large monolingual text corpus, made up of 771 643 sentences, to enable future data-driven approaches.</abstract>
<identifier type="citekey">mahlaza-etal-2024-automatically-generating</identifier>
<location>
<url>https://aclanthology.org/2024.inlg-main.21</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>254</start>
<end>271</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatically Generating IsiZulu Words From Indo-Arabic Numerals
%A Mahlaza, Zola
%A Magwenzi, Tadiwa
%A Keet, C. Maria
%A Khumalo, Langa
%Y Mahamood, Saad
%Y Minh, Nguyen Le
%Y Ippolito, Daphne
%S Proceedings of the 17th International Natural Language Generation Conference
%D 2024
%8 September
%I Association for Computational Linguistics
%C Tokyo, Japan
%F mahlaza-etal-2024-automatically-generating
%X Artificial conversational agents are deployed to assist humans in a variety of tasks. Some of these tasks require the capability to communicate numbers as part of their internal and abstract representations of meaning, such as for banking and scheduling appointments. They currently cannot do so for isiZulu because there are no algorithms to do so due to a lack of speech and text data and the transformation is complex and it may include dependence on the type of noun that is counted. We solved this by extracting and iteratively improving on the rules for speaking and writing numerals as words and creating two algorithms to automate the transformation. Evaluation of the algorithms by two isiZulu grammarians showed that six out of seven number categories were 90-100% correct. The same software was used with an additional set of rules to create a large monolingual text corpus, made up of 771 643 sentences, to enable future data-driven approaches.
%U https://aclanthology.org/2024.inlg-main.21
%P 254-271
Markdown (Informal)
[Automatically Generating IsiZulu Words From Indo-Arabic Numerals](https://aclanthology.org/2024.inlg-main.21) (Mahlaza et al., INLG 2024)
ACL