@inproceedings{keita-etal-2026-nsl,
title = "{NSL}-{MT}: Linguistically Informed Negative Samples for Efficient Machine Translation in {A}frican Low-Resource Languages",
author = "Keita, Mamadou K. and
Homan, Christopher M and
Le, Huy",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Findings of the {A}ssociation for {C}omputational {L}inguistics: {ACL} 2026",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.findings-acl.465/",
pages = "9545--9560",
ISBN = "979-8-89176-395-1",
abstract = "We introduce negative space learning machine translation (NSL-MT), a training method for underresourced languages, that augments limited parallel data with synthetically generated violations of the target language{'}s grammar and explicitly penalizes the model when it assigns high probability to these linguistically invalid outputs. NSL-MT delivers improvements across all baselines we tested, including 3-12{\%} BLEU gains for well-performing models and 56-89{\%} gains for models lacking decent initial support. Furthermore, NSL-MT provides a 5x data efficiency multiplier: training with 1,000 examples matches or exceeds normal training with 5,000 examples. NSL-MT thus provides a data-efficient alternative training method for settings where parallel data is limited."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="keita-etal-2026-nsl">
<titleInfo>
<title>NSL-MT: Linguistically Informed Negative Samples for Efficient Machine Translation in African Low-Resource Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mamadou</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Keita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Homan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Huy</namePart>
<namePart type="family">Le</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Liakata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Viviane</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jiajun</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Jurgens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">San Diego, California, United States</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-395-1</identifier>
</relatedItem>
<abstract>We introduce negative space learning machine translation (NSL-MT), a training method for underresourced languages, that augments limited parallel data with synthetically generated violations of the target language’s grammar and explicitly penalizes the model when it assigns high probability to these linguistically invalid outputs. NSL-MT delivers improvements across all baselines we tested, including 3-12% BLEU gains for well-performing models and 56-89% gains for models lacking decent initial support. Furthermore, NSL-MT provides a 5x data efficiency multiplier: training with 1,000 examples matches or exceeds normal training with 5,000 examples. NSL-MT thus provides a data-efficient alternative training method for settings where parallel data is limited.</abstract>
<identifier type="citekey">keita-etal-2026-nsl</identifier>
<location>
<url>https://aclanthology.org/2026.findings-acl.465/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>9545</start>
<end>9560</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T NSL-MT: Linguistically Informed Negative Samples for Efficient Machine Translation in African Low-Resource Languages
%A Keita, Mamadou K.
%A Homan, Christopher M.
%A Le, Huy
%Y Liakata, Maria
%Y Moreira, Viviane P.
%Y Zhang, Jiajun
%Y Jurgens, David
%S Findings of the Association for Computational Linguistics: ACL 2026
%D 2026
%8 July
%I Association for Computational Linguistics
%C San Diego, California, United States
%@ 979-8-89176-395-1
%F keita-etal-2026-nsl
%X We introduce negative space learning machine translation (NSL-MT), a training method for underresourced languages, that augments limited parallel data with synthetically generated violations of the target language’s grammar and explicitly penalizes the model when it assigns high probability to these linguistically invalid outputs. NSL-MT delivers improvements across all baselines we tested, including 3-12% BLEU gains for well-performing models and 56-89% gains for models lacking decent initial support. Furthermore, NSL-MT provides a 5x data efficiency multiplier: training with 1,000 examples matches or exceeds normal training with 5,000 examples. NSL-MT thus provides a data-efficient alternative training method for settings where parallel data is limited.
%U https://aclanthology.org/2026.findings-acl.465/
%P 9545-9560
Markdown (Informal)
[NSL-MT: Linguistically Informed Negative Samples for Efficient Machine Translation in African Low-Resource Languages](https://aclanthology.org/2026.findings-acl.465/) (Keita et al., Findings 2026)
ACL