@inproceedings{muhammad-etal-2025-hausanlp,
title = "{H}ausa{NLP}: Current Status, Challenges and Future Directions for {H}ausa Natural Language Processing",
author = "Muhammad, Shamsuddeen Hassan and
Ahmad, Ibrahim Said and
Abdulmumin, Idris and
Lawan, Falalu Ibrahim and
Imam, Sukairaj Hafiz and
Aliyu, Yusuf and
Sani, Sani Abdullahi and
Umar, Ali Usman and
Gwadabe, Tajuddeen and
Church, Kenneth and
Marivate, Vukosi",
editor = "Lignos, Constantine and
Abdulmumin, Idris and
Adelani, David",
booktitle = "Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.africanlp-1.27/",
doi = "10.18653/v1/2025.africanlp-1.27",
pages = "176--191",
ISBN = "979-8-89176-257-2",
abstract = "Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="muhammad-etal-2025-hausanlp">
<titleInfo>
<title>HausaNLP: Current Status, Challenges and Future Directions for Hausa Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shamsuddeen</namePart>
<namePart type="given">Hassan</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="given">Said</namePart>
<namePart type="family">Ahmad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Falalu</namePart>
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Lawan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sukairaj</namePart>
<namePart type="given">Hafiz</namePart>
<namePart type="family">Imam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuf</namePart>
<namePart type="family">Aliyu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sani</namePart>
<namePart type="given">Abdullahi</namePart>
<namePart type="family">Sani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="given">Usman</namePart>
<namePart type="family">Umar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tajuddeen</namePart>
<namePart type="family">Gwadabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kenneth</namePart>
<namePart type="family">Church</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vukosi</namePart>
<namePart type="family">Marivate</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-257-2</identifier>
</relatedItem>
<abstract>Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research.</abstract>
<identifier type="citekey">muhammad-etal-2025-hausanlp</identifier>
<identifier type="doi">10.18653/v1/2025.africanlp-1.27</identifier>
<location>
<url>https://aclanthology.org/2025.africanlp-1.27/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>176</start>
<end>191</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HausaNLP: Current Status, Challenges and Future Directions for Hausa Natural Language Processing
%A Muhammad, Shamsuddeen Hassan
%A Ahmad, Ibrahim Said
%A Abdulmumin, Idris
%A Lawan, Falalu Ibrahim
%A Imam, Sukairaj Hafiz
%A Aliyu, Yusuf
%A Sani, Sani Abdullahi
%A Umar, Ali Usman
%A Gwadabe, Tajuddeen
%A Church, Kenneth
%A Marivate, Vukosi
%Y Lignos, Constantine
%Y Abdulmumin, Idris
%Y Adelani, David
%S Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-257-2
%F muhammad-etal-2025-hausanlp
%X Hausa Natural Language Processing (NLP) has gained increasing attention in recent years, yet remains understudied as a low-resource language despite having over 120 million first-language (L1) and 80 million second-language (L2) speakers worldwide. While significant advances have been made in high-resource languages, Hausa NLP faces persistent challenges including limited open-source datasets and inadequate model representation. This paper presents an overview of the current state of Hausa NLP, systematically examining existing resources, research contributions, and gaps across fundamental NLP tasks: text classification, machine translation, named entity recognition, speech recognition, and question answering. We introduce HausaNLP, a curated catalog that aggregates datasets, tools, and research works to enhance accessibility and drive further development. Furthermore, we discuss challenges in integrating Hausa into large language models (LLMs), addressing issues of suboptimal tokenization, and dialectal variation. Finally, we propose strategic research directions emphasizing dataset expansion, improved language modeling approaches, and strengthened community collaboration to advance Hausa NLP. Our work provides both a foundation for accelerating Hausa NLP progress and valuable insights for broader multilingual NLP research.
%R 10.18653/v1/2025.africanlp-1.27
%U https://aclanthology.org/2025.africanlp-1.27/
%U https://doi.org/10.18653/v1/2025.africanlp-1.27
%P 176-191
Markdown (Informal)
[HausaNLP: Current Status, Challenges and Future Directions for Hausa Natural Language Processing](https://aclanthology.org/2025.africanlp-1.27/) (Muhammad et al., AfricaNLP 2025)
ACL
- Shamsuddeen Hassan Muhammad, Ibrahim Said Ahmad, Idris Abdulmumin, Falalu Ibrahim Lawan, Sukairaj Hafiz Imam, Yusuf Aliyu, Sani Abdullahi Sani, Ali Usman Umar, Tajuddeen Gwadabe, Kenneth Church, and Vukosi Marivate. 2025. HausaNLP: Current Status, Challenges and Future Directions for Hausa Natural Language Processing. In Proceedings of the Sixth Workshop on African Natural Language Processing (AfricaNLP 2025), pages 176–191, Vienna, Austria. Association for Computational Linguistics.