@inproceedings{alnajjar-hamalainen-2021-que,
title = "!`{Q}u{\'e} maravilla! Multimodal Sarcasm Detection in {S}panish: a Dataset and a Baseline",
author = {Alnajjar, Khalid and
H{\"a}m{\"a}l{\"a}inen, Mika},
editor = "Zadeh, Amir and
Morency, Louis-Philippe and
Liang, Paul Pu and
Ross, Candace and
Salakhutdinov, Ruslan and
Poria, Soujanya and
Cambria, Erik and
Shi, Kelly",
booktitle = "Proceedings of the Third Workshop on Multimodal Artificial Intelligence",
month = jun,
year = "2021",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.maiworkshop-1.9",
doi = "10.18653/v1/2021.maiworkshop-1.9",
pages = "63--68",
abstract = "We construct the first ever multimodal sarcasm dataset for Spanish. The audiovisual dataset consists of sarcasm annotated text that is aligned with video and audio. The dataset represents two varieties of Spanish, a Latin American variety and a Peninsular Spanish variety, which ensures a wider dialectal coverage for this global language. We present several models for sarcasm detection that will serve as baselines in the future research. Our results show that results with text only (89{\%}) are worse than when combining text with audio (91.9{\%}). Finally, the best results are obtained when combining all the modalities: text, audio and video (93.1{\%}). Our dataset will be published on Zenodo with access granted by request.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alnajjar-hamalainen-2021-que">
<titleInfo>
<title>!‘Qué maravilla! Multimodal Sarcasm Detection in Spanish: a Dataset and a Baseline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Alnajjar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mika</namePart>
<namePart type="family">Hämäläinen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Multimodal Artificial Intelligence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Louis-Philippe</namePart>
<namePart type="family">Morency</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="given">Pu</namePart>
<namePart type="family">Liang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Candace</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Salakhutdinov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Soujanya</namePart>
<namePart type="family">Poria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erik</namePart>
<namePart type="family">Cambria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kelly</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We construct the first ever multimodal sarcasm dataset for Spanish. The audiovisual dataset consists of sarcasm annotated text that is aligned with video and audio. The dataset represents two varieties of Spanish, a Latin American variety and a Peninsular Spanish variety, which ensures a wider dialectal coverage for this global language. We present several models for sarcasm detection that will serve as baselines in the future research. Our results show that results with text only (89%) are worse than when combining text with audio (91.9%). Finally, the best results are obtained when combining all the modalities: text, audio and video (93.1%). Our dataset will be published on Zenodo with access granted by request.</abstract>
<identifier type="citekey">alnajjar-hamalainen-2021-que</identifier>
<identifier type="doi">10.18653/v1/2021.maiworkshop-1.9</identifier>
<location>
<url>https://aclanthology.org/2021.maiworkshop-1.9</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>63</start>
<end>68</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T !‘Qué maravilla! Multimodal Sarcasm Detection in Spanish: a Dataset and a Baseline
%A Alnajjar, Khalid
%A Hämäläinen, Mika
%Y Zadeh, Amir
%Y Morency, Louis-Philippe
%Y Liang, Paul Pu
%Y Ross, Candace
%Y Salakhutdinov, Ruslan
%Y Poria, Soujanya
%Y Cambria, Erik
%Y Shi, Kelly
%S Proceedings of the Third Workshop on Multimodal Artificial Intelligence
%D 2021
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F alnajjar-hamalainen-2021-que
%X We construct the first ever multimodal sarcasm dataset for Spanish. The audiovisual dataset consists of sarcasm annotated text that is aligned with video and audio. The dataset represents two varieties of Spanish, a Latin American variety and a Peninsular Spanish variety, which ensures a wider dialectal coverage for this global language. We present several models for sarcasm detection that will serve as baselines in the future research. Our results show that results with text only (89%) are worse than when combining text with audio (91.9%). Finally, the best results are obtained when combining all the modalities: text, audio and video (93.1%). Our dataset will be published on Zenodo with access granted by request.
%R 10.18653/v1/2021.maiworkshop-1.9
%U https://aclanthology.org/2021.maiworkshop-1.9
%U https://doi.org/10.18653/v1/2021.maiworkshop-1.9
%P 63-68
Markdown (Informal)
[¡Qué maravilla! Multimodal Sarcasm Detection in Spanish: a Dataset and a Baseline](https://aclanthology.org/2021.maiworkshop-1.9) (Alnajjar & Hämäläinen, maiworkshop 2021)
ACL