@inproceedings{alvarez-c-etal-2025-advancing,
title = "Advancing Uto-Aztecan Language Technologies: A Case Study on the Endangered {C}omanche Language",
author = "Alvarez C, Jesus and
Karajeanes, Daua and
Prado, Ashley and
Ruttan, John and
Yang, Ivory and
O{'}brien, Sean and
Sharma, Vasu and
Zhu, Kevin",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Pugh, Robert and
Rijhwani, Shruti and
Von Der Wense, Katharina and
Chiruzzo, Luis and
Coto-Solano, Rolando and
Oncevay, Arturo",
booktitle = "Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.americasnlp-1.4/",
doi = "10.18653/v1/2025.americasnlp-1.4",
pages = "27--37",
ISBN = "979-8-89176-236-7",
abstract = "The digital exclusion of endangered languages remains a critical challenge in NLP, limiting both linguistic research and revitalization efforts. This study introduces the first computational investigation of Comanche, an Uto-Aztecan language on the verge of extinction, demonstrating how minimal-cost, community-informed NLP interventions can support language preservation. We present a manually curated dataset of 412 phrases, a synthetic data generation pipeline, and an empirical evaluation of GPT-4o and GPT-4o-mini for language identification. Our experiments reveal that while LLMs struggle with Comanche in zero-shot settings, few-shot prompting significantly improves performance, achieving near-perfect accuracy with just five examples. Our findings highlight the potential of targeted NLP methodologies in low-resource contexts and emphasize that visibility is the first step toward inclusion. By establishing a foundation for Comanche in NLP, we advocate for computational approaches that prioritize accessibility, cultural sensitivity, and community engagement."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alvarez-c-etal-2025-advancing">
<titleInfo>
<title>Advancing Uto-Aztecan Language Technologies: A Case Study on the Endangered Comanche Language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jesus</namePart>
<namePart type="family">Alvarez C</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daua</namePart>
<namePart type="family">Karajeanes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashley</namePart>
<namePart type="family">Prado</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="family">Ruttan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivory</namePart>
<namePart type="family">Yang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">O’brien</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasu</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Zhu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Pugh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Von Der Wense</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-236-7</identifier>
</relatedItem>
<abstract>The digital exclusion of endangered languages remains a critical challenge in NLP, limiting both linguistic research and revitalization efforts. This study introduces the first computational investigation of Comanche, an Uto-Aztecan language on the verge of extinction, demonstrating how minimal-cost, community-informed NLP interventions can support language preservation. We present a manually curated dataset of 412 phrases, a synthetic data generation pipeline, and an empirical evaluation of GPT-4o and GPT-4o-mini for language identification. Our experiments reveal that while LLMs struggle with Comanche in zero-shot settings, few-shot prompting significantly improves performance, achieving near-perfect accuracy with just five examples. Our findings highlight the potential of targeted NLP methodologies in low-resource contexts and emphasize that visibility is the first step toward inclusion. By establishing a foundation for Comanche in NLP, we advocate for computational approaches that prioritize accessibility, cultural sensitivity, and community engagement.</abstract>
<identifier type="citekey">alvarez-c-etal-2025-advancing</identifier>
<identifier type="doi">10.18653/v1/2025.americasnlp-1.4</identifier>
<location>
<url>https://aclanthology.org/2025.americasnlp-1.4/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>27</start>
<end>37</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Advancing Uto-Aztecan Language Technologies: A Case Study on the Endangered Comanche Language
%A Alvarez C, Jesus
%A Karajeanes, Daua
%A Prado, Ashley
%A Ruttan, John
%A Yang, Ivory
%A O’brien, Sean
%A Sharma, Vasu
%A Zhu, Kevin
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Pugh, Robert
%Y Rijhwani, Shruti
%Y Von Der Wense, Katharina
%Y Chiruzzo, Luis
%Y Coto-Solano, Rolando
%Y Oncevay, Arturo
%S Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-236-7
%F alvarez-c-etal-2025-advancing
%X The digital exclusion of endangered languages remains a critical challenge in NLP, limiting both linguistic research and revitalization efforts. This study introduces the first computational investigation of Comanche, an Uto-Aztecan language on the verge of extinction, demonstrating how minimal-cost, community-informed NLP interventions can support language preservation. We present a manually curated dataset of 412 phrases, a synthetic data generation pipeline, and an empirical evaluation of GPT-4o and GPT-4o-mini for language identification. Our experiments reveal that while LLMs struggle with Comanche in zero-shot settings, few-shot prompting significantly improves performance, achieving near-perfect accuracy with just five examples. Our findings highlight the potential of targeted NLP methodologies in low-resource contexts and emphasize that visibility is the first step toward inclusion. By establishing a foundation for Comanche in NLP, we advocate for computational approaches that prioritize accessibility, cultural sensitivity, and community engagement.
%R 10.18653/v1/2025.americasnlp-1.4
%U https://aclanthology.org/2025.americasnlp-1.4/
%U https://doi.org/10.18653/v1/2025.americasnlp-1.4
%P 27-37
Markdown (Informal)
[Advancing Uto-Aztecan Language Technologies: A Case Study on the Endangered Comanche Language](https://aclanthology.org/2025.americasnlp-1.4/) (Alvarez C et al., AmericasNLP 2025)
ACL
- Jesus Alvarez C, Daua Karajeanes, Ashley Prado, John Ruttan, Ivory Yang, Sean O’brien, Vasu Sharma, and Kevin Zhu. 2025. Advancing Uto-Aztecan Language Technologies: A Case Study on the Endangered Comanche Language. In Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP), pages 27–37, Albuquerque, New Mexico. Association for Computational Linguistics.