@inproceedings{marini-jezek-2024-annotate,
title = "What to Annotate: Retrieving Lexical Markers of Conspiracy Discourse from an {I}talian-{E}nglish Corpus of Telegram Data",
author = "Marini, Costanza and
Jezek, Elisabetta",
editor = "Bunt, Harry and
Ide, Nancy and
Lee, Kiyong and
Petukhova, Volha and
Pustejovsky, James and
Romary, Laurent",
booktitle = "Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.isa-1.6",
pages = "47--52",
abstract = "In this age of social media, Conspiracy Theories (CTs) have become an issue that can no longer be ignored. After providing an overview of CT literature and corpus studies, we describe the creation of a 40,000-token English-Italian bilingual corpus of conspiracy-oriented Telegram comments {--} the Complotto corpus {--} and the linguistic analysis we performed using the Sketch Engine online platform (Kilgarriff et al., 2010) on our annotated data to identify statistically relevant linguistic markers of CT discourse. Thanks to the platform{'}s keywords and key terms extraction functions, we were able to assess the statistical significance of the following lexical and semantic phenomena, both cross-linguistically and cross-CT, namely: (1) evidentiality and epistemic modality markers; (2) debunking vocabulary referring to another version of the truth lying behind the official one; (3) the conceptual metaphor INSTITUTIONS ARE ABUSERS. All these features qualify as markers of CT discourse and have the potential to be effectively used for future semantic annotation tasks to develop automatic systems for CT identification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marini-jezek-2024-annotate">
<titleInfo>
<title>What to Annotate: Retrieving Lexical Markers of Conspiracy Discourse from an Italian-English Corpus of Telegram Data</title>
</titleInfo>
<name type="personal">
<namePart type="given">Costanza</namePart>
<namePart type="family">Marini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elisabetta</namePart>
<namePart type="family">Jezek</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Harry</namePart>
<namePart type="family">Bunt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nancy</namePart>
<namePart type="family">Ide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kiyong</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Volha</namePart>
<namePart type="family">Petukhova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Romary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this age of social media, Conspiracy Theories (CTs) have become an issue that can no longer be ignored. After providing an overview of CT literature and corpus studies, we describe the creation of a 40,000-token English-Italian bilingual corpus of conspiracy-oriented Telegram comments – the Complotto corpus – and the linguistic analysis we performed using the Sketch Engine online platform (Kilgarriff et al., 2010) on our annotated data to identify statistically relevant linguistic markers of CT discourse. Thanks to the platform’s keywords and key terms extraction functions, we were able to assess the statistical significance of the following lexical and semantic phenomena, both cross-linguistically and cross-CT, namely: (1) evidentiality and epistemic modality markers; (2) debunking vocabulary referring to another version of the truth lying behind the official one; (3) the conceptual metaphor INSTITUTIONS ARE ABUSERS. All these features qualify as markers of CT discourse and have the potential to be effectively used for future semantic annotation tasks to develop automatic systems for CT identification.</abstract>
<identifier type="citekey">marini-jezek-2024-annotate</identifier>
<location>
<url>https://aclanthology.org/2024.isa-1.6</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>47</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T What to Annotate: Retrieving Lexical Markers of Conspiracy Discourse from an Italian-English Corpus of Telegram Data
%A Marini, Costanza
%A Jezek, Elisabetta
%Y Bunt, Harry
%Y Ide, Nancy
%Y Lee, Kiyong
%Y Petukhova, Volha
%Y Pustejovsky, James
%Y Romary, Laurent
%S Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F marini-jezek-2024-annotate
%X In this age of social media, Conspiracy Theories (CTs) have become an issue that can no longer be ignored. After providing an overview of CT literature and corpus studies, we describe the creation of a 40,000-token English-Italian bilingual corpus of conspiracy-oriented Telegram comments – the Complotto corpus – and the linguistic analysis we performed using the Sketch Engine online platform (Kilgarriff et al., 2010) on our annotated data to identify statistically relevant linguistic markers of CT discourse. Thanks to the platform’s keywords and key terms extraction functions, we were able to assess the statistical significance of the following lexical and semantic phenomena, both cross-linguistically and cross-CT, namely: (1) evidentiality and epistemic modality markers; (2) debunking vocabulary referring to another version of the truth lying behind the official one; (3) the conceptual metaphor INSTITUTIONS ARE ABUSERS. All these features qualify as markers of CT discourse and have the potential to be effectively used for future semantic annotation tasks to develop automatic systems for CT identification.
%U https://aclanthology.org/2024.isa-1.6
%P 47-52
Markdown (Informal)
[What to Annotate: Retrieving Lexical Markers of Conspiracy Discourse from an Italian-English Corpus of Telegram Data](https://aclanthology.org/2024.isa-1.6) (Marini & Jezek, ISA-WS 2024)
ACL