@inproceedings{duarte-etal-2026-consumerbr,
title = "{C}onsumer{BR}: A Large-Scale Corpus of Consumer Complaints in {B}razilian {P}ortuguese",
author = "Duarte, Luis A. and
Giacomin, Pedro and
Bispo, Vit{\'o}ria and
Silva, Mariana O. and
Pereira, Adriano C. M. and
Pappa, Gisele L.",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.66/",
pages = "667--675",
ISBN = "979-8-89176-387-6",
abstract = "We present ConsumerBR, a large-scale corpus of consumer complaints and company responses in Brazilian Portuguese, compiled from publicly available data on the Consumidor.gov.br platform. The corpus comprises over 3.1 million consumer{--}company interactions collected between 2021 and 2025 and combines anonymized textual content with rich structured metadata, including temporal information, complaint outcomes, and consumer satisfaction indicators. We describe a data collection strategy tailored to the platform{'}s dynamic interface, a preprocessing pipeline that includes response clustering to identify template-based replies, and a hybrid anonymization approach designed to mitigate privacy risks. We also provide a detailed statistical characterization of the corpus, highlighting its scale, coverage, and distributional properties. ConsumerBR is publicly available for research purposes and supports a wide range of applications, including complaint analysis, sentiment modeling, dialogue and response generation, and preference-based evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="duarte-etal-2026-consumerbr">
<titleInfo>
<title>ConsumerBR: A Large-Scale Corpus of Consumer Complaints in Brazilian Portuguese</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Duarte</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="family">Giacomin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vitória</namePart>
<namePart type="family">Bispo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mariana</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adriano</namePart>
<namePart type="given">C</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Pereira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gisele</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Pappa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>We present ConsumerBR, a large-scale corpus of consumer complaints and company responses in Brazilian Portuguese, compiled from publicly available data on the Consumidor.gov.br platform. The corpus comprises over 3.1 million consumer–company interactions collected between 2021 and 2025 and combines anonymized textual content with rich structured metadata, including temporal information, complaint outcomes, and consumer satisfaction indicators. We describe a data collection strategy tailored to the platform’s dynamic interface, a preprocessing pipeline that includes response clustering to identify template-based replies, and a hybrid anonymization approach designed to mitigate privacy risks. We also provide a detailed statistical characterization of the corpus, highlighting its scale, coverage, and distributional properties. ConsumerBR is publicly available for research purposes and supports a wide range of applications, including complaint analysis, sentiment modeling, dialogue and response generation, and preference-based evaluation.</abstract>
<identifier type="citekey">duarte-etal-2026-consumerbr</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.66/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>667</start>
<end>675</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ConsumerBR: A Large-Scale Corpus of Consumer Complaints in Brazilian Portuguese
%A Duarte, Luis A.
%A Giacomin, Pedro
%A Bispo, Vitória
%A Silva, Mariana O.
%A Pereira, Adriano C. M.
%A Pappa, Gisele L.
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F duarte-etal-2026-consumerbr
%X We present ConsumerBR, a large-scale corpus of consumer complaints and company responses in Brazilian Portuguese, compiled from publicly available data on the Consumidor.gov.br platform. The corpus comprises over 3.1 million consumer–company interactions collected between 2021 and 2025 and combines anonymized textual content with rich structured metadata, including temporal information, complaint outcomes, and consumer satisfaction indicators. We describe a data collection strategy tailored to the platform’s dynamic interface, a preprocessing pipeline that includes response clustering to identify template-based replies, and a hybrid anonymization approach designed to mitigate privacy risks. We also provide a detailed statistical characterization of the corpus, highlighting its scale, coverage, and distributional properties. ConsumerBR is publicly available for research purposes and supports a wide range of applications, including complaint analysis, sentiment modeling, dialogue and response generation, and preference-based evaluation.
%U https://aclanthology.org/2026.propor-1.66/
%P 667-675
Markdown (Informal)
[ConsumerBR: A Large-Scale Corpus of Consumer Complaints in Brazilian Portuguese](https://aclanthology.org/2026.propor-1.66/) (Duarte et al., PROPOR 2026)
ACL