@inproceedings{marreira-etal-2026-rating,
title = "Rating{--}Text Mismatch in {B}razilian {P}ortuguese Reviews: How Reliable Are Zero-Shot {LLM}s?",
author = "Marreira, Emanuelle and
Figueiredo, Carlos M. S. and
Melo, Tiago de",
editor = "Souza, Marlo and
de-Dios-Flores, Iria and
Santos, Diana and
Freitas, Larissa and
Souza, Jackson Wilke da Cruz and
Ribeiro, Eug{\'e}nio",
booktitle = "Proceedings of the 17th International Conference on Computational Processing of {P}ortuguese ({PROPOR} 2026) - Vol. 1",
month = apr,
year = "2026",
address = "Salvador, Brazil",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.propor-1.96/",
pages = "959--967",
ISBN = "979-8-89176-387-6",
abstract = "This study evaluates the ability of large language models (LLMs) to detect incoherence between the text of product reviews and their assigned rating (1 or 5 stars). Using popular LLMs such as GPT-5, Llama-4 and DeepSeek-3.2, and models optimized for Brazilian Portuguese, Sabi{\'a}-3.1 and Bode-3.1, we show that some are capable of detecting incoherence among texts and ratings (F1 {\ensuremath{>}} 90{\%}) in a zero-shot protocol. Models also present a high agreement in the predictions, where several prediction rounds led to low variability (Fleiss' {\ensuremath{\kappa}}{\ensuremath{>}} 0.95). With the demonstrated incoherence present in all product categories (aprox. 10{\%} of comments), the results suggest that LLMs are very promising to perform this high semantic interpretation task, and they can be used as valuable tools for online monitoring and recommendation systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marreira-etal-2026-rating">
<titleInfo>
<title>Rating–Text Mismatch in Brazilian Portuguese Reviews: How Reliable Are Zero-Shot LLMs?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Emanuelle</namePart>
<namePart type="family">Marreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="given">M</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Figueiredo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="given">de</namePart>
<namePart type="family">Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marlo</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">de-Dios-Flores</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diana</namePart>
<namePart type="family">Santos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Larissa</namePart>
<namePart type="family">Freitas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackson</namePart>
<namePart type="given">Wilke</namePart>
<namePart type="given">da</namePart>
<namePart type="given">Cruz</namePart>
<namePart type="family">Souza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eugénio</namePart>
<namePart type="family">Ribeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Salvador, Brazil</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-387-6</identifier>
</relatedItem>
<abstract>This study evaluates the ability of large language models (LLMs) to detect incoherence between the text of product reviews and their assigned rating (1 or 5 stars). Using popular LLMs such as GPT-5, Llama-4 and DeepSeek-3.2, and models optimized for Brazilian Portuguese, Sabiá-3.1 and Bode-3.1, we show that some are capable of detecting incoherence among texts and ratings (F1 \ensuremath> 90%) in a zero-shot protocol. Models also present a high agreement in the predictions, where several prediction rounds led to low variability (Fleiss’ \ensuremathąppa\ensuremath> 0.95). With the demonstrated incoherence present in all product categories (aprox. 10% of comments), the results suggest that LLMs are very promising to perform this high semantic interpretation task, and they can be used as valuable tools for online monitoring and recommendation systems.</abstract>
<identifier type="citekey">marreira-etal-2026-rating</identifier>
<location>
<url>https://aclanthology.org/2026.propor-1.96/</url>
</location>
<part>
<date>2026-04</date>
<extent unit="page">
<start>959</start>
<end>967</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Rating–Text Mismatch in Brazilian Portuguese Reviews: How Reliable Are Zero-Shot LLMs?
%A Marreira, Emanuelle
%A Figueiredo, Carlos M. S.
%A Melo, Tiago de
%Y Souza, Marlo
%Y de-Dios-Flores, Iria
%Y Santos, Diana
%Y Freitas, Larissa
%Y Souza, Jackson Wilke da Cruz
%Y Ribeiro, Eugénio
%S Proceedings of the 17th International Conference on Computational Processing of Portuguese (PROPOR 2026) - Vol. 1
%D 2026
%8 April
%I Association for Computational Linguistics
%C Salvador, Brazil
%@ 979-8-89176-387-6
%F marreira-etal-2026-rating
%X This study evaluates the ability of large language models (LLMs) to detect incoherence between the text of product reviews and their assigned rating (1 or 5 stars). Using popular LLMs such as GPT-5, Llama-4 and DeepSeek-3.2, and models optimized for Brazilian Portuguese, Sabiá-3.1 and Bode-3.1, we show that some are capable of detecting incoherence among texts and ratings (F1 \ensuremath> 90%) in a zero-shot protocol. Models also present a high agreement in the predictions, where several prediction rounds led to low variability (Fleiss’ \ensuremathąppa\ensuremath> 0.95). With the demonstrated incoherence present in all product categories (aprox. 10% of comments), the results suggest that LLMs are very promising to perform this high semantic interpretation task, and they can be used as valuable tools for online monitoring and recommendation systems.
%U https://aclanthology.org/2026.propor-1.96/
%P 959-967
Markdown (Informal)
[Rating–Text Mismatch in Brazilian Portuguese Reviews: How Reliable Are Zero-Shot LLMs?](https://aclanthology.org/2026.propor-1.96/) (Marreira et al., PROPOR 2026)
ACL