@inproceedings{fazzioni-etal-2026-biatron,
title = "Biatron: A Parameter-Efficient Small Language Model for {B}razilian {P}ortuguese with Integrated Mathematical Reasoning",
author = "Fazzioni, Daniel and
Almeida, Maria C. X. de and
Moreira, Anna P. V. L. B. and
Soares, Anderson S. and
Oliveira, S{\'a}vio S. T. de and
Federson, Fernando M.",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.86/",
pages = "868--877",
ISBN = "979-8-89176-387-6",
abstract = "The development of Small Language Models (SLMs) for Portuguese faces significant challenges in balancing parameter efficiency with specialized capabilities, particularly in mathematical reasoning domains where existing models demonstrate limited native competence. This work introduces the first model in the Biatron series, a 345-million-parameter language model specifically optimized for Brazilian Portuguese through strategic data curation rather than brute-force parameter scaling. Using a carefully designed 60-30-10 data mixture combining high-quality Portuguese text from GigaVerbo, chain-of-thought reasoning examples, and mathematical datasets, Biatron was trained on 300 billion tokens using the Megatron-LM framework, achieving 32{\%} Model FLOP Utilization.The model attains an overall score of 0.245 (aggregate performance) on Portuguese-specific benchmarks, approaching within 1.6{\%} of Tucano-630M{'}s performance while utilizing 45{\%} fewer parameters. Most significantly, Biatron achieves 7.5{\%} Pass@1 accuracy on mathematical reasoning tasks{---}more than doubling the performance of Tucano-2.4B (3.5{\%}) despite being nearly seven times smaller. These results validate that strategic data mixing can rival parameter scaling for language model development, establishing a reproducible methodology for efficient AI development in resource constrained language contexts. To support reproducibility and further research, the final model weights, training logs, and intermediate checkpoints are publicly available."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fazzioni-etal-2026-biatron">
<titleInfo>
<title>Biatron: A Parameter-Efficient Small Language Model for Brazilian Portuguese with Integrated Mathematical Reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Fazzioni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">C</namePart>
<namePart type="given">X</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Almeida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="given">P</namePart>
<namePart type="given">V</namePart>
<namePart type="given">L</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Moreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anderson</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Soares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sávio</namePart>
<namePart type="given">S</namePart>
<namePart type="given">T</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Federson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>The development of Small Language Models (SLMs) for Portuguese faces significant challenges in balancing parameter efficiency with specialized capabilities, particularly in mathematical reasoning domains where existing models demonstrate limited native competence. This work introduces the first model in the Biatron series, a 345-million-parameter language model specifically optimized for Brazilian Portuguese through strategic data curation rather than brute-force parameter scaling. Using a carefully designed 60-30-10 data mixture combining high-quality Portuguese text from GigaVerbo, chain-of-thought reasoning examples, and mathematical datasets, Biatron was trained on 300 billion tokens using the Megatron-LM framework, achieving 32% Model FLOP Utilization.The model attains an overall score of 0.245 (aggregate performance) on Portuguese-specific benchmarks, approaching within 1.6% of Tucano-630M’s performance while utilizing 45% fewer parameters. Most significantly, Biatron achieves 7.5% Pass@1 accuracy on mathematical reasoning tasks—more than doubling the performance of Tucano-2.4B (3.5%) despite being nearly seven times smaller. These results validate that strategic data mixing can rival parameter scaling for language model development, establishing a reproducible methodology for efficient AI development in resource constrained language contexts. To support reproducibility and further research, the final model weights, training logs, and intermediate checkpoints are publicly available.</abstract>
<identifier type="citekey">fazzioni-etal-2026-biatron</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.86/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>868</start>
<end>877</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Biatron: A Parameter-Efficient Small Language Model for Brazilian Portuguese with Integrated Mathematical Reasoning
%A Fazzioni, Daniel
%A Almeida, Maria C. X. de
%A Moreira, Anna P. V. L. B.
%A Soares, Anderson S.
%A Oliveira, Sávio S. T. de
%A Federson, Fernando M.
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F fazzioni-etal-2026-biatron
%X The development of Small Language Models (SLMs) for Portuguese faces significant challenges in balancing parameter efficiency with specialized capabilities, particularly in mathematical reasoning domains where existing models demonstrate limited native competence. This work introduces the first model in the Biatron series, a 345-million-parameter language model specifically optimized for Brazilian Portuguese through strategic data curation rather than brute-force parameter scaling. Using a carefully designed 60-30-10 data mixture combining high-quality Portuguese text from GigaVerbo, chain-of-thought reasoning examples, and mathematical datasets, Biatron was trained on 300 billion tokens using the Megatron-LM framework, achieving 32% Model FLOP Utilization.The model attains an overall score of 0.245 (aggregate performance) on Portuguese-specific benchmarks, approaching within 1.6% of Tucano-630M’s performance while utilizing 45% fewer parameters. Most significantly, Biatron achieves 7.5% Pass@1 accuracy on mathematical reasoning tasks—more than doubling the performance of Tucano-2.4B (3.5%) despite being nearly seven times smaller. These results validate that strategic data mixing can rival parameter scaling for language model development, establishing a reproducible methodology for efficient AI development in resource constrained language contexts. To support reproducibility and further research, the final model weights, training logs, and intermediate checkpoints are publicly available.
%U https://aclanthology.org/2026.propor-1.86/
%P 868-877
Markdown (Informal)
[Biatron: A Parameter-Efficient Small Language Model for Brazilian Portuguese with Integrated Mathematical Reasoning](https://aclanthology.org/2026.propor-1.86/) (Fazzioni et al., PROPOR 2026)
ACL