@inproceedings{barberia-etal-2025-say,
title = "It{'}s about What and How you say it: A Corpus with Stance and Sentiment Annotation for {COVID}-19 Vaccines Posts on {X}/{T}witter by {B}razilian Political Elites",
author = "Barberia, Lorena and
Schmalz, Pedro and
Trevisan Roman, Norton and
Lombard, Belinda and
Moraes de Sousa, Tatiane",
editor = {H{\"a}m{\"a}l{\"a}inen, Mika and
{\"O}hman, Emily and
Bizzoni, Yuri and
Miyagawa, So and
Alnajjar, Khalid},
booktitle = "Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities",
month = may,
year = "2025",
address = "Albuquerque, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.nlp4dh-1.32/",
doi = "10.18653/v1/2025.nlp4dh-1.32",
pages = "365--376",
ISBN = "979-8-89176-234-3",
abstract = "This paper details the development of a corpus with posts in Brazilian Portuguese published by Brazilian political elites on X (formerly Twitter) regarding COVID-19 vaccines. The corpus consists of 9,045 posts annotated for relevance, stance and sentiment towards COVID-19 vaccines and vaccination during the first three years of the COVID-19 pandemic (2020-2022).Nine annotators, working in three groups, classified relevance, stance, and sentiment in messages posted between 2020 and 2022 by local political elites. The annotators underwent extensive training, and weekly meetings were conducted to ensure intra-group annotation consistency. The analysis revealed fair to moderate inter-annotator agreement (Average Krippendorf{'}s alpha of 0.94 for relevance, 0,67 for sentiment and 0,70 for stance). This work makes four significant contributions to the literature. First, it addresses the scarcity of corpora in Brazilian Portuguese, particularly on COVID-19 or vaccines in general. Second, it provides a reliable annotation scheme for sentiment and stance classification, distinguishing both tasks, thereby improving classification precision. Third, it offers a corpus annotated with stance and sentiment according to this scheme, demonstrating how these tasks differ and how conflating them may lead to inconsistencies in corpus construction, as a results of confounding these phenomena {---} a recurring issue in NLP research beyond studies focusing on vaccines. And fourth, this annotated corpus may serve as the gold standard for fine-tuning and evaluating supervised machine learning models for relevance, sentiment and stance analysis of X posts on similar domains."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barberia-etal-2025-say">
<titleInfo>
<title>It’s about What and How you say it: A Corpus with Stance and Sentiment Annotation for COVID-19 Vaccines Posts on X/Twitter by Brazilian Political Elites</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lorena</namePart>
<namePart type="family">Barberia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pedro</namePart>
<namePart type="family">Schmalz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Norton</namePart>
<namePart type="family">Trevisan Roman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Belinda</namePart>
<namePart type="family">Lombard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tatiane</namePart>
<namePart type="family">Moraes de Sousa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="family">Öhman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">So</namePart>
<namePart type="family">Miyagawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-234-3</identifier>
</relatedItem>
<abstract>This paper details the development of a corpus with posts in Brazilian Portuguese published by Brazilian political elites on X (formerly Twitter) regarding COVID-19 vaccines. The corpus consists of 9,045 posts annotated for relevance, stance and sentiment towards COVID-19 vaccines and vaccination during the first three years of the COVID-19 pandemic (2020-2022).Nine annotators, working in three groups, classified relevance, stance, and sentiment in messages posted between 2020 and 2022 by local political elites. The annotators underwent extensive training, and weekly meetings were conducted to ensure intra-group annotation consistency. The analysis revealed fair to moderate inter-annotator agreement (Average Krippendorf’s alpha of 0.94 for relevance, 0,67 for sentiment and 0,70 for stance). This work makes four significant contributions to the literature. First, it addresses the scarcity of corpora in Brazilian Portuguese, particularly on COVID-19 or vaccines in general. Second, it provides a reliable annotation scheme for sentiment and stance classification, distinguishing both tasks, thereby improving classification precision. Third, it offers a corpus annotated with stance and sentiment according to this scheme, demonstrating how these tasks differ and how conflating them may lead to inconsistencies in corpus construction, as a results of confounding these phenomena — a recurring issue in NLP research beyond studies focusing on vaccines. And fourth, this annotated corpus may serve as the gold standard for fine-tuning and evaluating supervised machine learning models for relevance, sentiment and stance analysis of X posts on similar domains.</abstract>
<identifier type="citekey">barberia-etal-2025-say</identifier>
<identifier type="doi">10.18653/v1/2025.nlp4dh-1.32</identifier>
<location>
<url>https://aclanthology.org/2025.nlp4dh-1.32/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>365</start>
<end>376</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T It’s about What and How you say it: A Corpus with Stance and Sentiment Annotation for COVID-19 Vaccines Posts on X/Twitter by Brazilian Political Elites
%A Barberia, Lorena
%A Schmalz, Pedro
%A Trevisan Roman, Norton
%A Lombard, Belinda
%A Moraes de Sousa, Tatiane
%Y Hämäläinen, Mika
%Y Öhman, Emily
%Y Bizzoni, Yuri
%Y Miyagawa, So
%Y Alnajjar, Khalid
%S Proceedings of the 5th International Conference on Natural Language Processing for Digital Humanities
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, USA
%@ 979-8-89176-234-3
%F barberia-etal-2025-say
%X This paper details the development of a corpus with posts in Brazilian Portuguese published by Brazilian political elites on X (formerly Twitter) regarding COVID-19 vaccines. The corpus consists of 9,045 posts annotated for relevance, stance and sentiment towards COVID-19 vaccines and vaccination during the first three years of the COVID-19 pandemic (2020-2022).Nine annotators, working in three groups, classified relevance, stance, and sentiment in messages posted between 2020 and 2022 by local political elites. The annotators underwent extensive training, and weekly meetings were conducted to ensure intra-group annotation consistency. The analysis revealed fair to moderate inter-annotator agreement (Average Krippendorf’s alpha of 0.94 for relevance, 0,67 for sentiment and 0,70 for stance). This work makes four significant contributions to the literature. First, it addresses the scarcity of corpora in Brazilian Portuguese, particularly on COVID-19 or vaccines in general. Second, it provides a reliable annotation scheme for sentiment and stance classification, distinguishing both tasks, thereby improving classification precision. Third, it offers a corpus annotated with stance and sentiment according to this scheme, demonstrating how these tasks differ and how conflating them may lead to inconsistencies in corpus construction, as a results of confounding these phenomena — a recurring issue in NLP research beyond studies focusing on vaccines. And fourth, this annotated corpus may serve as the gold standard for fine-tuning and evaluating supervised machine learning models for relevance, sentiment and stance analysis of X posts on similar domains.
%R 10.18653/v1/2025.nlp4dh-1.32
%U https://aclanthology.org/2025.nlp4dh-1.32/
%U https://doi.org/10.18653/v1/2025.nlp4dh-1.32
%P 365-376
Markdown (Informal)
[It’s about What and How you say it: A Corpus with Stance and Sentiment Annotation for COVID-19 Vaccines Posts on X/Twitter by Brazilian Political Elites](https://aclanthology.org/2025.nlp4dh-1.32/) (Barberia et al., NLP4DH 2025)
ACL