@inproceedings{alexandre-alencar-2026-parsing,
title = "Parsing Nheengatu: Performance Gains for a {B}razilian Indigenous {U}niversal {D}ependencies Treebank",
author = "Alexandre, Dominick Maia and
Alencar, Leonel Figueiredo de",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 2",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-2.29/",
pages = "210--219",
ISBN = "979-8-89176-387-6",
abstract = "This paper evaluates the impact of expanding the UD{\_}Nheengatu-CompLin treebank on parsing performance for Nheengatu, a Brazilian endangered Indigenous language. We hypothesized that the inclusion of annotated data would result in a 10{\%} improvement in the Labeled Attachment Score (LAS). To test this hypothesis, we conducted a 10-fold cross-validation experiment using UDPipe 1.4 under two conditions: parsing with gold tokenization and gold tags, and automatic parsing from raw text. Statistical significance was determined using the Mann-Whitney U test. Although the expected gain was not achieved, the results show improvements in parsing accuracy and reduced variance across folds. The findings highlight the importance of corpus expansion and standardized annotation workflows for improving parsing performance in low-resource language scenarios and for supporting reproducible evaluation methods in the computational modeling of minority languages."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alexandre-alencar-2026-parsing">
<titleInfo>
<title>Parsing Nheengatu: Performance Gains for a Brazilian Indigenous Universal Dependencies Treebank</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dominick</namePart>
<namePart type="given">Maia</namePart>
<namePart type="family">Alexandre</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leonel</namePart>
<namePart type="given">Figueiredo</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Alencar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>This paper evaluates the impact of expanding the UD_Nheengatu-CompLin treebank on parsing performance for Nheengatu, a Brazilian endangered Indigenous language. We hypothesized that the inclusion of annotated data would result in a 10% improvement in the Labeled Attachment Score (LAS). To test this hypothesis, we conducted a 10-fold cross-validation experiment using UDPipe 1.4 under two conditions: parsing with gold tokenization and gold tags, and automatic parsing from raw text. Statistical significance was determined using the Mann-Whitney U test. Although the expected gain was not achieved, the results show improvements in parsing accuracy and reduced variance across folds. The findings highlight the importance of corpus expansion and standardized annotation workflows for improving parsing performance in low-resource language scenarios and for supporting reproducible evaluation methods in the computational modeling of minority languages.</abstract>
<identifier type="citekey">alexandre-alencar-2026-parsing</identifier>
<location>
<url>https://aclanthology.org/2026.propor-2.29/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>210</start>
<end>219</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Parsing Nheengatu: Performance Gains for a Brazilian Indigenous Universal Dependencies Treebank
%A Alexandre, Dominick Maia
%A Alencar, Leonel Figueiredo de
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F alexandre-alencar-2026-parsing
%X This paper evaluates the impact of expanding the UD_Nheengatu-CompLin treebank on parsing performance for Nheengatu, a Brazilian endangered Indigenous language. We hypothesized that the inclusion of annotated data would result in a 10% improvement in the Labeled Attachment Score (LAS). To test this hypothesis, we conducted a 10-fold cross-validation experiment using UDPipe 1.4 under two conditions: parsing with gold tokenization and gold tags, and automatic parsing from raw text. Statistical significance was determined using the Mann-Whitney U test. Although the expected gain was not achieved, the results show improvements in parsing accuracy and reduced variance across folds. The findings highlight the importance of corpus expansion and standardized annotation workflows for improving parsing performance in low-resource language scenarios and for supporting reproducible evaluation methods in the computational modeling of minority languages.
%U https://aclanthology.org/2026.propor-2.29/
%P 210-219
Markdown (Informal)
[Parsing Nheengatu: Performance Gains for a Brazilian Indigenous Universal Dependencies Treebank](https://aclanthology.org/2026.propor-2.29/) (Alexandre & Alencar, PROPOR 2026)
ACL