@inproceedings{nsumba-etal-2026-salt,
title = "{SALT}-31: A Machine Translation Benchmark Dataset for 31 Ugandan Languages",
author = "Nsumba, Solomon and
Akera, Benjamin and
Ouma, Evelyn Nafula and
Ssentanda, Medadi E. and
Kawalya, Deo and
Bainomugisha, Engineer and
Mwebaze, Ernest Tonny and
Quinn, John",
editor = "Chimoto, Everlyn Asiko and
Lignos, Constantine and
Muhammad, Shamsuddeen and
Abdulmumin, Idris and
Siro, Clemencia and
Adelani, David Ifeoluwa",
booktitle = "Proceedings of the 7th Workshop on {A}frican Natural Language Processing ({A}frica{NLP} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.africanlp-main.21/",
pages = "211--216",
ISBN = "979-8-89176-364-7",
abstract = "We present the SALT-31 benchmark dataset for evaluation of machine translation models covering 31 Ugandan languages. Unlike sentence-level evaluation sets, SALT-31 is constructed from short, scenario-driven mini-dialogues designed to preserve discourse context, pragmatics, and culturally grounded communication patterns common in everyday Ugandan settings. The dataset contains 100 English sentences organized into 20 typical communication scenarios, each represented as a five-sentence mini-sequence. It can therefore be used to evaluate both sentence-level and paragraph level machine translation, and includes nearly every language spoken in a country with high linguistic diversity. It is available at https://huggingface.co/datasets/Sunbird/salt-31"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nsumba-etal-2026-salt">
<titleInfo>
<title>SALT-31: A Machine Translation Benchmark Dataset for 31 Ugandan Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Solomon</namePart>
<namePart type="family">Nsumba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Akera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Evelyn</namePart>
<namePart type="given">Nafula</namePart>
<namePart type="family">Ouma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Medadi</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Ssentanda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Deo</namePart>
<namePart type="family">Kawalya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Engineer</namePart>
<namePart type="family">Bainomugisha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ernest</namePart>
<namePart type="given">Tonny</namePart>
<namePart type="family">Mwebaze</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Quinn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Everlyn</namePart>
<namePart type="given">Asiko</namePart>
<namePart type="family">Chimoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Constantine</namePart>
<namePart type="family">Lignos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shamsuddeen</namePart>
<namePart type="family">Muhammad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Idris</namePart>
<namePart type="family">Abdulmumin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clemencia</namePart>
<namePart type="family">Siro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="given">Ifeoluwa</namePart>
<namePart type="family">Adelani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-364-7</identifier>
</relatedItem>
<abstract>We present the SALT-31 benchmark dataset for evaluation of machine translation models covering 31 Ugandan languages. Unlike sentence-level evaluation sets, SALT-31 is constructed from short, scenario-driven mini-dialogues designed to preserve discourse context, pragmatics, and culturally grounded communication patterns common in everyday Ugandan settings. The dataset contains 100 English sentences organized into 20 typical communication scenarios, each represented as a five-sentence mini-sequence. It can therefore be used to evaluate both sentence-level and paragraph level machine translation, and includes nearly every language spoken in a country with high linguistic diversity. It is available at https://huggingface.co/datasets/Sunbird/salt-31</abstract>
<identifier type="citekey">nsumba-etal-2026-salt</identifier>
<location>
<url>https://aclanthology.org/2026.africanlp-main.21/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>211</start>
<end>216</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SALT-31: A Machine Translation Benchmark Dataset for 31 Ugandan Languages
%A Nsumba, Solomon
%A Akera, Benjamin
%A Ouma, Evelyn Nafula
%A Ssentanda, Medadi E.
%A Kawalya, Deo
%A Bainomugisha, Engineer
%A Mwebaze, Ernest Tonny
%A Quinn, John
%Y Chimoto, Everlyn Asiko
%Y Lignos, Constantine
%Y Muhammad, Shamsuddeen
%Y Abdulmumin, Idris
%Y Siro, Clemencia
%Y Adelani, David Ifeoluwa
%S Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-364-7
%F nsumba-etal-2026-salt
%X We present the SALT-31 benchmark dataset for evaluation of machine translation models covering 31 Ugandan languages. Unlike sentence-level evaluation sets, SALT-31 is constructed from short, scenario-driven mini-dialogues designed to preserve discourse context, pragmatics, and culturally grounded communication patterns common in everyday Ugandan settings. The dataset contains 100 English sentences organized into 20 typical communication scenarios, each represented as a five-sentence mini-sequence. It can therefore be used to evaluate both sentence-level and paragraph level machine translation, and includes nearly every language spoken in a country with high linguistic diversity. It is available at https://huggingface.co/datasets/Sunbird/salt-31
%U https://aclanthology.org/2026.africanlp-main.21/
%P 211-216
Markdown (Informal)
[SALT-31: A Machine Translation Benchmark Dataset for 31 Ugandan Languages](https://aclanthology.org/2026.africanlp-main.21/) (Nsumba et al., AfricaNLP 2026)
ACL
- Solomon Nsumba, Benjamin Akera, Evelyn Nafula Ouma, Medadi E. Ssentanda, Deo Kawalya, Engineer Bainomugisha, Ernest Tonny Mwebaze, and John Quinn. 2026. SALT-31: A Machine Translation Benchmark Dataset for 31 Ugandan Languages. In Proceedings of the 7th Workshop on African Natural Language Processing (AfricaNLP 2026), pages 211–216, Rabat, Morocco. Association for Computational Linguistics.