@inproceedings{umar-etal-2026-thesis,
title = "Thesis Proposal: Self-Adaptive and Epistemic Uncertainty-Guided {ASR} of Dense Intra-Sentential Code-Switched Speech for {A}frican Low-Resource Languages",
author = "Umar, Umar Baba and
Bashir, Sulaimon Adebayo and
Mohammed, Abdulmalik Danlami and
Tafida, Amina Gogo",
editor = "T.Y.S.S., Santosh and
Rodriguez, Juan Diego and
de Gibert, Ona",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.acl-srw.67/",
pages = "754--763",
ISBN = "979-8-89176-393-7",
abstract = "Automatic Speech Recognition (ASR) has achieved strong performance for high-resource languages, but dense intra-sentential code-switched speech in African low-resource settings remains underexplored. Existing multilingual and pretrained ASR systems improve general recognition accuracy, yet they remain weak at switch regions, are sensitive to language imbalance during adaptation, and are typically evaluated with metrics that obscure switching-specific errors. This thesis proposes a self-adaptive and epistemic uncertainty-guided framework for African low-resource code-switched ASR, using Hausa{--}English (Engausa) and Hausa{--}Yor{\`u}b{\'a} as case studies. The work investigates three linked questions: (1) how to design a linguistically informed code-switched corpus with explicit switch-region annotation and labeled/unlabeled partitions for adaptive learning, (2) whether epistemic uncertainty is systematically elevated around switch regions and can guide pseudo-label selection in semi-supervised training, and (3) whether switch-aware adaptation with auxiliary language identification and boundary supervision can reduce recognition errors without increasing catastrophic forgetting. The long-term goal is to develop scalable and data-efficient ASR systems that model code-switching as a structured linguistic phenomenon rather than as noise in multilingual African speech."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="umar-etal-2026-thesis">
<titleInfo>
<title>Thesis Proposal: Self-Adaptive and Epistemic Uncertainty-Guided ASR of Dense Intra-Sentential Code-Switched Speech for African Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Umar</namePart>
<namePart type="given">Baba</namePart>
<namePart type="family">Umar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sulaimon</namePart>
<namePart type="given">Adebayo</namePart>
<namePart type="family">Bashir</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdulmalik</namePart>
<namePart type="given">Danlami</namePart>
<namePart type="family">Mohammed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amina</namePart>
<namePart type="given">Gogo</namePart>
<namePart type="family">Tafida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santosh</namePart>
<namePart type="family">T.Y.S.S.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="given">Diego</namePart>
<namePart type="family">Rodriguez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ona</namePart>
<namePart type="family">de Gibert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-393-7</identifier>
</relatedItem>
<abstract>Automatic Speech Recognition (ASR) has achieved strong performance for high-resource languages, but dense intra-sentential code-switched speech in African low-resource settings remains underexplored. Existing multilingual and pretrained ASR systems improve general recognition accuracy, yet they remain weak at switch regions, are sensitive to language imbalance during adaptation, and are typically evaluated with metrics that obscure switching-specific errors. This thesis proposes a self-adaptive and epistemic uncertainty-guided framework for African low-resource code-switched ASR, using Hausa–English (Engausa) and Hausa–Yorùbá as case studies. The work investigates three linked questions: (1) how to design a linguistically informed code-switched corpus with explicit switch-region annotation and labeled/unlabeled partitions for adaptive learning, (2) whether epistemic uncertainty is systematically elevated around switch regions and can guide pseudo-label selection in semi-supervised training, and (3) whether switch-aware adaptation with auxiliary language identification and boundary supervision can reduce recognition errors without increasing catastrophic forgetting. The long-term goal is to develop scalable and data-efficient ASR systems that model code-switching as a structured linguistic phenomenon rather than as noise in multilingual African speech.</abstract>
<identifier type="citekey">umar-etal-2026-thesis</identifier>
<location>
<url>https://aclanthology.org/2026.acl-srw.67/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>754</start>
<end>763</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Thesis Proposal: Self-Adaptive and Epistemic Uncertainty-Guided ASR of Dense Intra-Sentential Code-Switched Speech for African Low-Resource Languages
%A Umar, Umar Baba
%A Bashir, Sulaimon Adebayo
%A Mohammed, Abdulmalik Danlami
%A Tafida, Amina Gogo
%Y T.Y.S.S., Santosh
%Y Rodriguez, Juan Diego
%Y de Gibert, Ona
%S Proceedings of the 64th Annual Meeting of the Association for Computational Linguistics (ACL 2026)
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-393-7
%F umar-etal-2026-thesis
%X Automatic Speech Recognition (ASR) has achieved strong performance for high-resource languages, but dense intra-sentential code-switched speech in African low-resource settings remains underexplored. Existing multilingual and pretrained ASR systems improve general recognition accuracy, yet they remain weak at switch regions, are sensitive to language imbalance during adaptation, and are typically evaluated with metrics that obscure switching-specific errors. This thesis proposes a self-adaptive and epistemic uncertainty-guided framework for African low-resource code-switched ASR, using Hausa–English (Engausa) and Hausa–Yorùbá as case studies. The work investigates three linked questions: (1) how to design a linguistically informed code-switched corpus with explicit switch-region annotation and labeled/unlabeled partitions for adaptive learning, (2) whether epistemic uncertainty is systematically elevated around switch regions and can guide pseudo-label selection in semi-supervised training, and (3) whether switch-aware adaptation with auxiliary language identification and boundary supervision can reduce recognition errors without increasing catastrophic forgetting. The long-term goal is to develop scalable and data-efficient ASR systems that model code-switching as a structured linguistic phenomenon rather than as noise in multilingual African speech.
%U https://aclanthology.org/2026.acl-srw.67/
%P 754-763
Markdown (Informal)
[Thesis Proposal: Self-Adaptive and Epistemic Uncertainty-Guided ASR of Dense Intra-Sentential Code-Switched Speech for African Low-Resource Languages](https://aclanthology.org/2026.acl-srw.67/) (Umar et al., ACL 2026)
ACL