@inproceedings{carpi-finger-2026-flexqwen,
title = "{F}lex{Q}wen: Exploring Hybrid Objectives and Text Originality for {P}ortuguese",
author = "Carpi, Miguel de Mello and
Finger, Marcelo",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.114/",
pages = "1079--1084",
ISBN = "979-8-89176-387-6",
abstract = "While scaling laws suggest increasing model and dataset sizes for better results, efficient pre-training techniques for low-resource scenarios present unique challenges that require further investigation. This work introduces FlexQwen, a model based on the Qwen 3 architecture adapted for a hybrid causal-masked objective, and the Carolina Originality dataset, a subset of the Corpus Carolina tailored for efficient pre-training in Portuguese. We investigate two primary research questions: the influence of hybrid masked-causal modelling and the impact of text originality on model performance. Our experiments compare a high-originality \textit{Gold} split against a length-matched control group. Results indicate that hybrid objectives may be viable for efficient training. Furthermore, we provide open access to our code, datasets, and training logs to foster further research in efficient Portuguese LLMs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="carpi-finger-2026-flexqwen">
<titleInfo>
<title>FlexQwen: Exploring Hybrid Objectives and Text Originality for Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Miguel</namePart>
<namePart type="given">de</namePart>
<namePart type="given">Mello</namePart>
<namePart type="family">Carpi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcelo</namePart>
<namePart type="family">Finger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>While scaling laws suggest increasing model and dataset sizes for better results, efficient pre-training techniques for low-resource scenarios present unique challenges that require further investigation. This work introduces FlexQwen, a model based on the Qwen 3 architecture adapted for a hybrid causal-masked objective, and the Carolina Originality dataset, a subset of the Corpus Carolina tailored for efficient pre-training in Portuguese. We investigate two primary research questions: the influence of hybrid masked-causal modelling and the impact of text originality on model performance. Our experiments compare a high-originality Gold split against a length-matched control group. Results indicate that hybrid objectives may be viable for efficient training. Furthermore, we provide open access to our code, datasets, and training logs to foster further research in efficient Portuguese LLMs.</abstract>
<identifier type="citekey">carpi-finger-2026-flexqwen</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.114/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>1079</start>
<end>1084</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T FlexQwen: Exploring Hybrid Objectives and Text Originality for Portuguese
%A Carpi, Miguel de Mello
%A Finger, Marcelo
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F carpi-finger-2026-flexqwen
%X While scaling laws suggest increasing model and dataset sizes for better results, efficient pre-training techniques for low-resource scenarios present unique challenges that require further investigation. This work introduces FlexQwen, a model based on the Qwen 3 architecture adapted for a hybrid causal-masked objective, and the Carolina Originality dataset, a subset of the Corpus Carolina tailored for efficient pre-training in Portuguese. We investigate two primary research questions: the influence of hybrid masked-causal modelling and the impact of text originality on model performance. Our experiments compare a high-originality Gold split against a length-matched control group. Results indicate that hybrid objectives may be viable for efficient training. Furthermore, we provide open access to our code, datasets, and training logs to foster further research in efficient Portuguese LLMs.
%U https://aclanthology.org/2026.propor-1.114/
%P 1079-1084
Markdown (Informal)
[FlexQwen: Exploring Hybrid Objectives and Text Originality for Portuguese](https://aclanthology.org/2026.propor-1.114/) (Carpi & Finger, PROPOR 2026)
ACL