@inproceedings{yahan-islam-2025-harnessing,
title = "Harnessing {NLP} for Indigenous Language Education: Fine-Tuning Large Language Models for Sentence Transformation",
author = "Yahan, Mahshar and
Islam, Dr. Mohammad",
editor = "Mager, Manuel and
Ebrahimi, Abteen and
Pugh, Robert and
Rijhwani, Shruti and
Von Der Wense, Katharina and
Chiruzzo, Luis and
Coto-Solano, Rolando and
Oncevay, Arturo",
booktitle = "Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.americasnlp-1.14/",
doi = "10.18653/v1/2025.americasnlp-1.14",
pages = "119--125",
ISBN = "979-8-89176-236-7",
abstract = "Indigenous languages face significant challenges due to their endangered status and limited resources which makes their integration into NLP systems difficult. This study investigates the use of Large Language Models (LLMs) for sentence transformation tasks in Indigenous languages, focusing on Bribri, Guarani, and Maya. Here, the dataset from the AmericasNLP 2025 Shared Task 2 is used to explore sentence transformations in Indigenous languages. The goal is to create educational tools by modifying sentences based on linguistic instructions, such as changes in tense, aspect, voice, person, and other grammatical features. The methodology involves preprocessing data, simplifying transformation tags, and designing zero-shot and few-shot prompts to guide LLMs in sentence rewriting. Fine-tuning techniques like LoRA and Bits-and-Bytes quantization were employed to optimize model performance while reducing computational costs. Among the tested models, Llama 3.2(3B-Instruct) demonstrated superior performance across all languages with high BLEU and ChrF++ scores, particularly excelling in few-shot settings. The Llama 3.2 model achieved BLEU scores of 19.51 for Bribri, 13.67 for Guarani, and 55.86 for Maya in test settings. Additionally, ChrF++ scores reached 50.29 for Bribri, 58.55 for Guarani, and 80.12 for Maya, showcasing its effectiveness in handling sentence transformation. These results highlight the potential of LLMs that can improve NLP tools for indigenous languages and help preserve linguistic diversity."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yahan-islam-2025-harnessing">
<titleInfo>
<title>Harnessing NLP for Indigenous Language Education: Fine-Tuning Large Language Models for Sentence Transformation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mahshar</namePart>
<namePart type="family">Yahan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dr.</namePart>
<namePart type="given">Mohammad</namePart>
<namePart type="family">Islam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abteen</namePart>
<namePart type="family">Ebrahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Pugh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shruti</namePart>
<namePart type="family">Rijhwani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Von Der Wense</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rolando</namePart>
<namePart type="family">Coto-Solano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-236-7</identifier>
</relatedItem>
<abstract>Indigenous languages face significant challenges due to their endangered status and limited resources which makes their integration into NLP systems difficult. This study investigates the use of Large Language Models (LLMs) for sentence transformation tasks in Indigenous languages, focusing on Bribri, Guarani, and Maya. Here, the dataset from the AmericasNLP 2025 Shared Task 2 is used to explore sentence transformations in Indigenous languages. The goal is to create educational tools by modifying sentences based on linguistic instructions, such as changes in tense, aspect, voice, person, and other grammatical features. The methodology involves preprocessing data, simplifying transformation tags, and designing zero-shot and few-shot prompts to guide LLMs in sentence rewriting. Fine-tuning techniques like LoRA and Bits-and-Bytes quantization were employed to optimize model performance while reducing computational costs. Among the tested models, Llama 3.2(3B-Instruct) demonstrated superior performance across all languages with high BLEU and ChrF++ scores, particularly excelling in few-shot settings. The Llama 3.2 model achieved BLEU scores of 19.51 for Bribri, 13.67 for Guarani, and 55.86 for Maya in test settings. Additionally, ChrF++ scores reached 50.29 for Bribri, 58.55 for Guarani, and 80.12 for Maya, showcasing its effectiveness in handling sentence transformation. These results highlight the potential of LLMs that can improve NLP tools for indigenous languages and help preserve linguistic diversity.</abstract>
<identifier type="citekey">yahan-islam-2025-harnessing</identifier>
<identifier type="doi">10.18653/v1/2025.americasnlp-1.14</identifier>
<location>
<url>https://aclanthology.org/2025.americasnlp-1.14/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>119</start>
<end>125</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Harnessing NLP for Indigenous Language Education: Fine-Tuning Large Language Models for Sentence Transformation
%A Yahan, Mahshar
%A Islam, Dr. Mohammad
%Y Mager, Manuel
%Y Ebrahimi, Abteen
%Y Pugh, Robert
%Y Rijhwani, Shruti
%Y Von Der Wense, Katharina
%Y Chiruzzo, Luis
%Y Coto-Solano, Rolando
%Y Oncevay, Arturo
%S Proceedings of the Fifth Workshop on NLP for Indigenous Languages of the Americas (AmericasNLP)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-236-7
%F yahan-islam-2025-harnessing
%X Indigenous languages face significant challenges due to their endangered status and limited resources which makes their integration into NLP systems difficult. This study investigates the use of Large Language Models (LLMs) for sentence transformation tasks in Indigenous languages, focusing on Bribri, Guarani, and Maya. Here, the dataset from the AmericasNLP 2025 Shared Task 2 is used to explore sentence transformations in Indigenous languages. The goal is to create educational tools by modifying sentences based on linguistic instructions, such as changes in tense, aspect, voice, person, and other grammatical features. The methodology involves preprocessing data, simplifying transformation tags, and designing zero-shot and few-shot prompts to guide LLMs in sentence rewriting. Fine-tuning techniques like LoRA and Bits-and-Bytes quantization were employed to optimize model performance while reducing computational costs. Among the tested models, Llama 3.2(3B-Instruct) demonstrated superior performance across all languages with high BLEU and ChrF++ scores, particularly excelling in few-shot settings. The Llama 3.2 model achieved BLEU scores of 19.51 for Bribri, 13.67 for Guarani, and 55.86 for Maya in test settings. Additionally, ChrF++ scores reached 50.29 for Bribri, 58.55 for Guarani, and 80.12 for Maya, showcasing its effectiveness in handling sentence transformation. These results highlight the potential of LLMs that can improve NLP tools for indigenous languages and help preserve linguistic diversity.
%R 10.18653/v1/2025.americasnlp-1.14
%U https://aclanthology.org/2025.americasnlp-1.14/
%U https://doi.org/10.18653/v1/2025.americasnlp-1.14
%P 119-125
Markdown (Informal)
[Harnessing NLP for Indigenous Language Education: Fine-Tuning Large Language Models for Sentence Transformation](https://aclanthology.org/2025.americasnlp-1.14/) (Yahan & Islam, AmericasNLP 2025)
ACL