@inproceedings{craveiro-aluisio-2026-robustness,
title = "Robustness and Diversity Evaluation on {P}ros{S}egue-{ML}: a Free Prosodic Segmentation Tool for {B}razilian {P}ortuguese",
author = "Craveiro, Giovana Meloni and
Alu{\'i}sio, Sandra Maria",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 2",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-2.24/",
pages = "170--180",
ISBN = "979-8-89176-387-6",
abstract = "Prosodic segmentation is the task of dividing a sound unit into smaller units, which can be distinguished between units with a completed idea, marked by TBs, and non-autonomous units, marked by NTBs. It is a useful task to enhance the performance of ASR and TTs systems, and it remains relevant for Brazilian Portuguese due to the diversity of conditions and speaker-related factors that influence its performance. Here, we explore a low-impact, open-source approach based on a Random Forest classifier and a set of features that include fundamental frequency, speech rate, pauses, and energy (Craveiro et al., 2025). We perform a robustness evaluation of the referred ML model, modifying a few conditions on its training, comparing its performance when tested in other datasets, and comparing its results with those of other studies using the same data samples. We experiment with augmenting the training dataset and evaluating how the bias of speaker profile aspects is affected when the size and diversity of the training set are changed. Although we don{'}t achieve statistically significant values in the bias evaluation, we observe that inequalities grow as the training dataset is expanded with a much larger, but less diverse sample of data."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="craveiro-aluisio-2026-robustness">
<titleInfo>
<title>Robustness and Diversity Evaluation on ProsSegue-ML: a Free Prosodic Segmentation Tool for Brazilian Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giovana</namePart>
<namePart type="given">Meloni</namePart>
<namePart type="family">Craveiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="given">Maria</namePart>
<namePart type="family">Aluísio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>Prosodic segmentation is the task of dividing a sound unit into smaller units, which can be distinguished between units with a completed idea, marked by TBs, and non-autonomous units, marked by NTBs. It is a useful task to enhance the performance of ASR and TTs systems, and it remains relevant for Brazilian Portuguese due to the diversity of conditions and speaker-related factors that influence its performance. Here, we explore a low-impact, open-source approach based on a Random Forest classifier and a set of features that include fundamental frequency, speech rate, pauses, and energy (Craveiro et al., 2025). We perform a robustness evaluation of the referred ML model, modifying a few conditions on its training, comparing its performance when tested in other datasets, and comparing its results with those of other studies using the same data samples. We experiment with augmenting the training dataset and evaluating how the bias of speaker profile aspects is affected when the size and diversity of the training set are changed. Although we don’t achieve statistically significant values in the bias evaluation, we observe that inequalities grow as the training dataset is expanded with a much larger, but less diverse sample of data.</abstract>
<identifier type="citekey">craveiro-aluisio-2026-robustness</identifier>
<location>
<url>https://aclanthology.org/2026.propor-2.24/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>170</start>
<end>180</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Robustness and Diversity Evaluation on ProsSegue-ML: a Free Prosodic Segmentation Tool for Brazilian Portuguese
%A Craveiro, Giovana Meloni
%A Aluísio, Sandra Maria
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 2
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F craveiro-aluisio-2026-robustness
%X Prosodic segmentation is the task of dividing a sound unit into smaller units, which can be distinguished between units with a completed idea, marked by TBs, and non-autonomous units, marked by NTBs. It is a useful task to enhance the performance of ASR and TTs systems, and it remains relevant for Brazilian Portuguese due to the diversity of conditions and speaker-related factors that influence its performance. Here, we explore a low-impact, open-source approach based on a Random Forest classifier and a set of features that include fundamental frequency, speech rate, pauses, and energy (Craveiro et al., 2025). We perform a robustness evaluation of the referred ML model, modifying a few conditions on its training, comparing its performance when tested in other datasets, and comparing its results with those of other studies using the same data samples. We experiment with augmenting the training dataset and evaluating how the bias of speaker profile aspects is affected when the size and diversity of the training set are changed. Although we don’t achieve statistically significant values in the bias evaluation, we observe that inequalities grow as the training dataset is expanded with a much larger, but less diverse sample of data.
%U https://aclanthology.org/2026.propor-2.24/
%P 170-180
Markdown (Informal)
[Robustness and Diversity Evaluation on ProsSegue-ML: a Free Prosodic Segmentation Tool for Brazilian Portuguese](https://aclanthology.org/2026.propor-2.24/) (Craveiro & Aluísio, PROPOR 2026)
ACL