@inproceedings{melo-figueiredo-2026-gender,
title = "Gender Identification in {B}razilian {P}ortuguese Product Reviews: A Comparative Study of Classical Models, {BERT}, and {LLM}s",
author = "Melo, Tiago de and
Figueiredo, Carlos M. S.",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.2/",
pages = "11--19",
ISBN = "979-8-89176-387-6",
abstract = "This study analyzes gender identification in Brazilian Portuguese using Amazon reviews drawn from ten product categories. Nine models were evaluated: three classical classifiers (Logistic Regression, Random Forest, and SVM), a multilingual BERT, and five LLMs (ChatGPT 4o, ChatGPT 3.5, DeepSeek, Sabia3, and Sabiazinho). Experiments show that BERT achieved the best performance (macro-F1 = 0.634), outperforming ChatGPT 4o and Logistic Regression by less than one percentage point. Reviews authored by women reach an average F1 of 0.654{---}four points higher than those by men. Performance also varies by domain: books and automotive are easier, whereas baby and pets are more challenging."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="melo-figueiredo-2026-gender">
<titleInfo>
<title>Gender Identification in Brazilian Portuguese Product Reviews: A Comparative Study of Classical Models, BERT, and LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">M</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Figueiredo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>This study analyzes gender identification in Brazilian Portuguese using Amazon reviews drawn from ten product categories. Nine models were evaluated: three classical classifiers (Logistic Regression, Random Forest, and SVM), a multilingual BERT, and five LLMs (ChatGPT 4o, ChatGPT 3.5, DeepSeek, Sabia3, and Sabiazinho). Experiments show that BERT achieved the best performance (macro-F1 = 0.634), outperforming ChatGPT 4o and Logistic Regression by less than one percentage point. Reviews authored by women reach an average F1 of 0.654—four points higher than those by men. Performance also varies by domain: books and automotive are easier, whereas baby and pets are more challenging.</abstract>
<identifier type="citekey">melo-figueiredo-2026-gender</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.2/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>11</start>
<end>19</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Gender Identification in Brazilian Portuguese Product Reviews: A Comparative Study of Classical Models, BERT, and LLMs
%A Melo, Tiago de
%A Figueiredo, Carlos M. S.
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F melo-figueiredo-2026-gender
%X This study analyzes gender identification in Brazilian Portuguese using Amazon reviews drawn from ten product categories. Nine models were evaluated: three classical classifiers (Logistic Regression, Random Forest, and SVM), a multilingual BERT, and five LLMs (ChatGPT 4o, ChatGPT 3.5, DeepSeek, Sabia3, and Sabiazinho). Experiments show that BERT achieved the best performance (macro-F1 = 0.634), outperforming ChatGPT 4o and Logistic Regression by less than one percentage point. Reviews authored by women reach an average F1 of 0.654—four points higher than those by men. Performance also varies by domain: books and automotive are easier, whereas baby and pets are more challenging.
%U https://aclanthology.org/2026.propor-1.2/
%P 11-19
Markdown (Informal)
[Gender Identification in Brazilian Portuguese Product Reviews: A Comparative Study of Classical Models, BERT, and LLMs](https://aclanthology.org/2026.propor-1.2/) (Melo & Figueiredo, PROPOR 2026)
ACL