@inproceedings{simon-etal-2022-transcasm,
title = "{T}rans{C}asm: A Bilingual Corpus of Sarcastic Tweets",
author = "Simon, Desline and
Castilho, Sheila and
Lohar, Pintu and
Afli, Haithem",
editor = "Afli, Haithem and
Alam, Mehwish and
Bouamor, Houda and
Casagran, Cristina Blasi and
Boland, Colleen and
Ghannay, Sahar",
booktitle = "Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.politicalnlp-1.14",
pages = "98--103",
abstract = "Sarcasm is extensively used in User Generated Content (UGC) in order to express one{'}s discontent, especially through blogs, forums, or social media such as Twitter. Several works have attempted to detect and analyse sarcasm in UGC. However, the lack of freely available corpora in this field makes the task even more difficult. In this work, we present {``}TransCasm{''} corpus, a parallel corpus of sarcastic tweets translated from English into French along with their non-sarcastic representations. To build the bilingual corpus of sarcasm, we select the {``}SIGN{''} corpus, a monolingual data set of sarcastic tweets and their non-sarcastic interpretations, created by (Peled and Reichart, 2017). We propose to define linguistic guidelines for developing {``}TransCasm{''} which is the first ever bilingual corpus of sarcastic tweets. In addition, we utilise {``}TransCasm{''} for building a binary sarcasm classifier in order to identify whether a tweet is sarcastic or not. Our experiment reveals that the sarcasm classifier achieves 61{\%} accuracy on detecting sarcasm in tweets. {``}TransCasm{''} is now freely available online and is ready to be explored for further research.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="simon-etal-2022-transcasm">
<titleInfo>
<title>TransCasm: A Bilingual Corpus of Sarcastic Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Desline</namePart>
<namePart type="family">Simon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sheila</namePart>
<namePart type="family">Castilho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pintu</namePart>
<namePart type="family">Lohar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Haithem</namePart>
<namePart type="family">Afli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haithem</namePart>
<namePart type="family">Afli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehwish</namePart>
<namePart type="family">Alam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="given">Blasi</namePart>
<namePart type="family">Casagran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colleen</namePart>
<namePart type="family">Boland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sahar</namePart>
<namePart type="family">Ghannay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sarcasm is extensively used in User Generated Content (UGC) in order to express one’s discontent, especially through blogs, forums, or social media such as Twitter. Several works have attempted to detect and analyse sarcasm in UGC. However, the lack of freely available corpora in this field makes the task even more difficult. In this work, we present “TransCasm” corpus, a parallel corpus of sarcastic tweets translated from English into French along with their non-sarcastic representations. To build the bilingual corpus of sarcasm, we select the “SIGN” corpus, a monolingual data set of sarcastic tweets and their non-sarcastic interpretations, created by (Peled and Reichart, 2017). We propose to define linguistic guidelines for developing “TransCasm” which is the first ever bilingual corpus of sarcastic tweets. In addition, we utilise “TransCasm” for building a binary sarcasm classifier in order to identify whether a tweet is sarcastic or not. Our experiment reveals that the sarcasm classifier achieves 61% accuracy on detecting sarcasm in tweets. “TransCasm” is now freely available online and is ready to be explored for further research.</abstract>
<identifier type="citekey">simon-etal-2022-transcasm</identifier>
<location>
<url>https://aclanthology.org/2022.politicalnlp-1.14</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>98</start>
<end>103</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TransCasm: A Bilingual Corpus of Sarcastic Tweets
%A Simon, Desline
%A Castilho, Sheila
%A Lohar, Pintu
%A Afli, Haithem
%Y Afli, Haithem
%Y Alam, Mehwish
%Y Bouamor, Houda
%Y Casagran, Cristina Blasi
%Y Boland, Colleen
%Y Ghannay, Sahar
%S Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F simon-etal-2022-transcasm
%X Sarcasm is extensively used in User Generated Content (UGC) in order to express one’s discontent, especially through blogs, forums, or social media such as Twitter. Several works have attempted to detect and analyse sarcasm in UGC. However, the lack of freely available corpora in this field makes the task even more difficult. In this work, we present “TransCasm” corpus, a parallel corpus of sarcastic tweets translated from English into French along with their non-sarcastic representations. To build the bilingual corpus of sarcasm, we select the “SIGN” corpus, a monolingual data set of sarcastic tweets and their non-sarcastic interpretations, created by (Peled and Reichart, 2017). We propose to define linguistic guidelines for developing “TransCasm” which is the first ever bilingual corpus of sarcastic tweets. In addition, we utilise “TransCasm” for building a binary sarcasm classifier in order to identify whether a tweet is sarcastic or not. Our experiment reveals that the sarcasm classifier achieves 61% accuracy on detecting sarcasm in tweets. “TransCasm” is now freely available online and is ready to be explored for further research.
%U https://aclanthology.org/2022.politicalnlp-1.14
%P 98-103
Markdown (Informal)
[TransCasm: A Bilingual Corpus of Sarcastic Tweets](https://aclanthology.org/2022.politicalnlp-1.14) (Simon et al., PoliticalNLP 2022)
ACL
- Desline Simon, Sheila Castilho, Pintu Lohar, and Haithem Afli. 2022. TransCasm: A Bilingual Corpus of Sarcastic Tweets. In Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences, pages 98–103, Marseille, France. European Language Resources Association.