@inproceedings{brito-etal-2025-akcit,
title = "{AKCIT} at {S}em{E}val-2025 Task 11: Investigating Data Quality in {P}ortuguese Emotion Recognition",
author = {Brito, Iago A. and
F{\"a}rber, Fernanda B. and
Dollis, Julia S. and
Pedrozo, Daniel M. and
Novais, Artur M. A. and
Silva, Diogo F. C. and
Galv{\~a}o Filho, Arlindo R.},
editor = "Rosenthal, Sara and
Ros{\'a}, Aiala and
Ghosh, Debanjan and
Zampieri, Marcos",
booktitle = "Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.semeval-1.300/",
pages = "2305--2310",
ISBN = "979-8-89176-273-2",
abstract = "This paper investigates the impact of data quality and processing strategies on emotion recognition in Brazilian Portuguese (PTBR) texts. We focus on data distribution, linguistic context, and augmentation techniques such as translation and synthetic data generation. To evaluate these aspects, we conduct experiments on the PTBR portion of the BRIGHTER dataset, a manually curated multilingual dataset containing nearly 100,000 samples, of which 4,552 are in PTBR. Our study encompasses both multi-label emotion detection (presence/absence classification) and emotion intensity prediction (0 to 3 scale), following the SemEval 2025 Track 11 setup. Results demonstrate that emotion intensity labels enhance model performance after discretization, and that smaller multilingual models can outperform larger ones in low-resource settings. Our official submission ranked 6th, but further refinements improved our ranking to 3rd, trailing the top submission by only 0.047, reinforcing the significance of a data-centric approach in emotion recognition."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brito-etal-2025-akcit">
<titleInfo>
<title>AKCIT at SemEval-2025 Task 11: Investigating Data Quality in Portuguese Emotion Recognition</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iago</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Brito</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernanda</namePart>
<namePart type="given">B</namePart>
<namePart type="family">Färber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Dollis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Pedrozo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Artur</namePart>
<namePart type="given">M</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Novais</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diogo</namePart>
<namePart type="given">F</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arlindo</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Galvão Filho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Rosenthal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aiala</namePart>
<namePart type="family">Rosá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debanjan</namePart>
<namePart type="family">Ghosh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-273-2</identifier>
</relatedItem>
<abstract>This paper investigates the impact of data quality and processing strategies on emotion recognition in Brazilian Portuguese (PTBR) texts. We focus on data distribution, linguistic context, and augmentation techniques such as translation and synthetic data generation. To evaluate these aspects, we conduct experiments on the PTBR portion of the BRIGHTER dataset, a manually curated multilingual dataset containing nearly 100,000 samples, of which 4,552 are in PTBR. Our study encompasses both multi-label emotion detection (presence/absence classification) and emotion intensity prediction (0 to 3 scale), following the SemEval 2025 Track 11 setup. Results demonstrate that emotion intensity labels enhance model performance after discretization, and that smaller multilingual models can outperform larger ones in low-resource settings. Our official submission ranked 6th, but further refinements improved our ranking to 3rd, trailing the top submission by only 0.047, reinforcing the significance of a data-centric approach in emotion recognition.</abstract>
<identifier type="citekey">brito-etal-2025-akcit</identifier>
<location>
<url>https://aclanthology.org/2025.semeval-1.300/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>2305</start>
<end>2310</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AKCIT at SemEval-2025 Task 11: Investigating Data Quality in Portuguese Emotion Recognition
%A Brito, Iago A.
%A Färber, Fernanda B.
%A Dollis, Julia S.
%A Pedrozo, Daniel M.
%A Novais, Artur M. A.
%A Silva, Diogo F. C.
%A Galvão Filho, Arlindo R.
%Y Rosenthal, Sara
%Y Rosá, Aiala
%Y Ghosh, Debanjan
%Y Zampieri, Marcos
%S Proceedings of the 19th International Workshop on Semantic Evaluation (SemEval-2025)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-273-2
%F brito-etal-2025-akcit
%X This paper investigates the impact of data quality and processing strategies on emotion recognition in Brazilian Portuguese (PTBR) texts. We focus on data distribution, linguistic context, and augmentation techniques such as translation and synthetic data generation. To evaluate these aspects, we conduct experiments on the PTBR portion of the BRIGHTER dataset, a manually curated multilingual dataset containing nearly 100,000 samples, of which 4,552 are in PTBR. Our study encompasses both multi-label emotion detection (presence/absence classification) and emotion intensity prediction (0 to 3 scale), following the SemEval 2025 Track 11 setup. Results demonstrate that emotion intensity labels enhance model performance after discretization, and that smaller multilingual models can outperform larger ones in low-resource settings. Our official submission ranked 6th, but further refinements improved our ranking to 3rd, trailing the top submission by only 0.047, reinforcing the significance of a data-centric approach in emotion recognition.
%U https://aclanthology.org/2025.semeval-1.300/
%P 2305-2310
Markdown (Informal)
[AKCIT at SemEval-2025 Task 11: Investigating Data Quality in Portuguese Emotion Recognition](https://aclanthology.org/2025.semeval-1.300/) (Brito et al., SemEval 2025)
ACL