@inproceedings{f-p-dossou-aidasso-2025-towards,
title = "Towards Open-Ended Discovery for Low-Resource {NLP}",
author = {F. P. Dossou, Bonaventure and
A{\"i}dasso, Henri},
editor = "Noidea, Noidea",
booktitle = "Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.uncertainlp-main.24/",
pages = "287--297",
ISBN = "979-8-89176-349-4",
abstract = "Natural language processing (NLP) for low-resource languages remains fundamentally constrained by the lack of textual corpora, standardized orthographies, and scalable annotation pipelines. While recent advances in large language models have improved cross-lingual transfer, they remain inaccessible to underrepresented communities due to their reliance on massive, pre-collected data and centralized infrastructure. In this position paper, we argue for a paradigm shift toward open-ended, interactive language discovery, where AI systems learn new languages dynamically through dialogue rather than static datasets. We contend that the future of language technology, particularly for low-resource and under-documented languages, must move beyond static data collection pipelines toward interactive, uncertainty-driven discovery, where learning emerges dynamically from human-machine collaboration instead of being limited to pre-existing datasets. We propose a framework grounded in joint human-machine uncertainty, combining epistemic uncertainty from the model with hesitation cues and confidence signals from human speakers to guide interaction, query selection, and memory retention. This paper is a call to action: we advocate a rethinking of how AI engages with human knowledge in under-documented languages, moving from extractive data collection toward participatory, co-adaptive learning processes that respect and empower communities while discovering and preserving the world{'}s linguistic diversity. This vision aligns with principles of human-centered AI and participatory design, emphasizing interactive, cooperative model building between AI systems and speakers."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="f-p-dossou-aidasso-2025-towards">
<titleInfo>
<title>Towards Open-Ended Discovery for Low-Resource NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonaventure</namePart>
<namePart type="family">F. P. Dossou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Henri</namePart>
<namePart type="family">Aïdasso</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Noidea</namePart>
<namePart type="family">Noidea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-349-4</identifier>
</relatedItem>
<abstract>Natural language processing (NLP) for low-resource languages remains fundamentally constrained by the lack of textual corpora, standardized orthographies, and scalable annotation pipelines. While recent advances in large language models have improved cross-lingual transfer, they remain inaccessible to underrepresented communities due to their reliance on massive, pre-collected data and centralized infrastructure. In this position paper, we argue for a paradigm shift toward open-ended, interactive language discovery, where AI systems learn new languages dynamically through dialogue rather than static datasets. We contend that the future of language technology, particularly for low-resource and under-documented languages, must move beyond static data collection pipelines toward interactive, uncertainty-driven discovery, where learning emerges dynamically from human-machine collaboration instead of being limited to pre-existing datasets. We propose a framework grounded in joint human-machine uncertainty, combining epistemic uncertainty from the model with hesitation cues and confidence signals from human speakers to guide interaction, query selection, and memory retention. This paper is a call to action: we advocate a rethinking of how AI engages with human knowledge in under-documented languages, moving from extractive data collection toward participatory, co-adaptive learning processes that respect and empower communities while discovering and preserving the world’s linguistic diversity. This vision aligns with principles of human-centered AI and participatory design, emphasizing interactive, cooperative model building between AI systems and speakers.</abstract>
<identifier type="citekey">f-p-dossou-aidasso-2025-towards</identifier>
<location>
<url>https://aclanthology.org/2025.uncertainlp-main.24/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>287</start>
<end>297</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Open-Ended Discovery for Low-Resource NLP
%A F. P. Dossou, Bonaventure
%A Aïdasso, Henri
%Y Noidea, Noidea
%S Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025)
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-349-4
%F f-p-dossou-aidasso-2025-towards
%X Natural language processing (NLP) for low-resource languages remains fundamentally constrained by the lack of textual corpora, standardized orthographies, and scalable annotation pipelines. While recent advances in large language models have improved cross-lingual transfer, they remain inaccessible to underrepresented communities due to their reliance on massive, pre-collected data and centralized infrastructure. In this position paper, we argue for a paradigm shift toward open-ended, interactive language discovery, where AI systems learn new languages dynamically through dialogue rather than static datasets. We contend that the future of language technology, particularly for low-resource and under-documented languages, must move beyond static data collection pipelines toward interactive, uncertainty-driven discovery, where learning emerges dynamically from human-machine collaboration instead of being limited to pre-existing datasets. We propose a framework grounded in joint human-machine uncertainty, combining epistemic uncertainty from the model with hesitation cues and confidence signals from human speakers to guide interaction, query selection, and memory retention. This paper is a call to action: we advocate a rethinking of how AI engages with human knowledge in under-documented languages, moving from extractive data collection toward participatory, co-adaptive learning processes that respect and empower communities while discovering and preserving the world’s linguistic diversity. This vision aligns with principles of human-centered AI and participatory design, emphasizing interactive, cooperative model building between AI systems and speakers.
%U https://aclanthology.org/2025.uncertainlp-main.24/
%P 287-297
Markdown (Informal)
[Towards Open-Ended Discovery for Low-Resource NLP](https://aclanthology.org/2025.uncertainlp-main.24/) (F. P. Dossou & Aïdasso, UncertaiNLP 2025)
ACL
- Bonaventure F. P. Dossou and Henri Aïdasso. 2025. Towards Open-Ended Discovery for Low-Resource NLP. In Proceedings of the 2nd Workshop on Uncertainty-Aware NLP (UncertaiNLP 2025), pages 287–297, Suzhou, China. Association for Computational Linguistics.