@inproceedings{yue-etal-2024-sarcnet,
title = "{S}arc{N}et: A Multilingual Multimodal Sarcasm Detection Dataset",
author = "Yue, Tan and
Shi, Xuzhao and
Mao, Rui and
Hu, Zonghai and
Cambria, Erik",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1248",
pages = "14325--14335",
abstract = "Sarcasm poses a challenge in linguistic analysis due to its implicit nature, involving an intended meaning that contradicts the literal expression. The advent of social networks has propelled the utilization of multimodal data to enhance sarcasm detection performance. In prior multimodal sarcasm detection datasets, a single label is assigned to a multimodal instance. Subsequent experiments often highlight the superiority of multimodal models by demonstrating their improvements compared to unimodal models based on these unified labels across multiple modalities. However, our investigation revealed that numerous instances of sarcasm cannot be identified using a single modality. Humans employ the conflict between a statement and factual information as a cue to detect sarcasm, and these cues can stem from different modalities. Then, a unified label for a multimodal instance may be not suitable for the associated text or image. In this work, we introduce SarcNet, a multilingual and multimodal sarcasm detection dataset in English and Chinese, consisting of 3,335 image-text pair samples. We provide annotations for sarcasm in visual, textual, and multimodal data, respectively, resulting in over 10,000 labeled instances. The separated annotation schema for unimodal and multimodal data facilitates a more accurate and reasonable assessment of unimodal and multimodal models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yue-etal-2024-sarcnet">
<titleInfo>
<title>SarcNet: A Multilingual Multimodal Sarcasm Detection Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tan</namePart>
<namePart type="family">Yue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuzhao</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Mao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zonghai</namePart>
<namePart type="family">Hu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Cambria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sarcasm poses a challenge in linguistic analysis due to its implicit nature, involving an intended meaning that contradicts the literal expression. The advent of social networks has propelled the utilization of multimodal data to enhance sarcasm detection performance. In prior multimodal sarcasm detection datasets, a single label is assigned to a multimodal instance. Subsequent experiments often highlight the superiority of multimodal models by demonstrating their improvements compared to unimodal models based on these unified labels across multiple modalities. However, our investigation revealed that numerous instances of sarcasm cannot be identified using a single modality. Humans employ the conflict between a statement and factual information as a cue to detect sarcasm, and these cues can stem from different modalities. Then, a unified label for a multimodal instance may be not suitable for the associated text or image. In this work, we introduce SarcNet, a multilingual and multimodal sarcasm detection dataset in English and Chinese, consisting of 3,335 image-text pair samples. We provide annotations for sarcasm in visual, textual, and multimodal data, respectively, resulting in over 10,000 labeled instances. The separated annotation schema for unimodal and multimodal data facilitates a more accurate and reasonable assessment of unimodal and multimodal models.</abstract>
<identifier type="citekey">yue-etal-2024-sarcnet</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1248</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>14325</start>
<end>14335</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SarcNet: A Multilingual Multimodal Sarcasm Detection Dataset
%A Yue, Tan
%A Shi, Xuzhao
%A Mao, Rui
%A Hu, Zonghai
%A Cambria, Erik
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F yue-etal-2024-sarcnet
%X Sarcasm poses a challenge in linguistic analysis due to its implicit nature, involving an intended meaning that contradicts the literal expression. The advent of social networks has propelled the utilization of multimodal data to enhance sarcasm detection performance. In prior multimodal sarcasm detection datasets, a single label is assigned to a multimodal instance. Subsequent experiments often highlight the superiority of multimodal models by demonstrating their improvements compared to unimodal models based on these unified labels across multiple modalities. However, our investigation revealed that numerous instances of sarcasm cannot be identified using a single modality. Humans employ the conflict between a statement and factual information as a cue to detect sarcasm, and these cues can stem from different modalities. Then, a unified label for a multimodal instance may be not suitable for the associated text or image. In this work, we introduce SarcNet, a multilingual and multimodal sarcasm detection dataset in English and Chinese, consisting of 3,335 image-text pair samples. We provide annotations for sarcasm in visual, textual, and multimodal data, respectively, resulting in over 10,000 labeled instances. The separated annotation schema for unimodal and multimodal data facilitates a more accurate and reasonable assessment of unimodal and multimodal models.
%U https://aclanthology.org/2024.lrec-main.1248
%P 14325-14335
Markdown (Informal)
[SarcNet: A Multilingual Multimodal Sarcasm Detection Dataset](https://aclanthology.org/2024.lrec-main.1248) (Yue et al., LREC-COLING 2024)
ACL
- Tan Yue, Xuzhao Shi, Rui Mao, Zonghai Hu, and Erik Cambria. 2024. SarcNet: A Multilingual Multimodal Sarcasm Detection Dataset. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 14325–14335, Torino, Italia. ELRA and ICCL.