@inproceedings{mazumder-etal-2025-revealing,
title = "Revealing the impact of synthetic native samples and multi-tasking strategies in {H}indi-{E}nglish code-mixed humour and sarcasm detection",
author = "Mazumder, Debajyoti and
Kumar, Aakash and
Patro, Jasabanta",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2025",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-emnlp.1308/",
pages = "24077--24107",
ISBN = "979-8-89176-335-7",
abstract = "In this paper, we reported our experiments with various strategies to improve code-mixed humour and sarcasm detection. Particularly, we tried three approaches: (i) native sample mixing, (ii) multi-task learning (MTL), and (iii) prompting and instruction finetuning very large multilingual language models (VMLMs). In native sample mixing, we added monolingual task samples to code-mixed training sets. In MTL learning, we relied on native and code-mixed samples of a semantically related task (hate detection in our case). Finally, in our third approach, we evaluated the efficacy of VMLMs via few-shot context prompting and instruction finetuning. Some interesting findings we got are (i) adding native samples improved humor (raising the F1-score up to 6.76{\%}) and sarcasm (raising the F1-score up to 8.64{\%}) detection, (ii) training MLMs in an MTL framework boosted performance for both humour (raising the F1-score up to 10.67{\%}) and sarcasm (increment up to 12.35{\%} in F1-score) detection, and (iii) prompting and instruction finetuning VMLMs couldn{'}t outperform the other approaches. Finally, our ablation studies and error analysis discovered the cases where our model is yet to improve. We provided our code for reproducibility."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mazumder-etal-2025-revealing">
<titleInfo>
<title>Revealing the impact of synthetic native samples and multi-tasking strategies in Hindi-English code-mixed humour and sarcasm detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Debajyoti</namePart>
<namePart type="family">Mazumder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aakash</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jasabanta</namePart>
<namePart type="family">Patro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-335-7</identifier>
</relatedItem>
<abstract>In this paper, we reported our experiments with various strategies to improve code-mixed humour and sarcasm detection. Particularly, we tried three approaches: (i) native sample mixing, (ii) multi-task learning (MTL), and (iii) prompting and instruction finetuning very large multilingual language models (VMLMs). In native sample mixing, we added monolingual task samples to code-mixed training sets. In MTL learning, we relied on native and code-mixed samples of a semantically related task (hate detection in our case). Finally, in our third approach, we evaluated the efficacy of VMLMs via few-shot context prompting and instruction finetuning. Some interesting findings we got are (i) adding native samples improved humor (raising the F1-score up to 6.76%) and sarcasm (raising the F1-score up to 8.64%) detection, (ii) training MLMs in an MTL framework boosted performance for both humour (raising the F1-score up to 10.67%) and sarcasm (increment up to 12.35% in F1-score) detection, and (iii) prompting and instruction finetuning VMLMs couldn’t outperform the other approaches. Finally, our ablation studies and error analysis discovered the cases where our model is yet to improve. We provided our code for reproducibility.</abstract>
<identifier type="citekey">mazumder-etal-2025-revealing</identifier>
<location>
<url>https://aclanthology.org/2025.findings-emnlp.1308/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>24077</start>
<end>24107</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Revealing the impact of synthetic native samples and multi-tasking strategies in Hindi-English code-mixed humour and sarcasm detection
%A Mazumder, Debajyoti
%A Kumar, Aakash
%A Patro, Jasabanta
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Findings of the Association for Computational Linguistics: EMNLP 2025
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-335-7
%F mazumder-etal-2025-revealing
%X In this paper, we reported our experiments with various strategies to improve code-mixed humour and sarcasm detection. Particularly, we tried three approaches: (i) native sample mixing, (ii) multi-task learning (MTL), and (iii) prompting and instruction finetuning very large multilingual language models (VMLMs). In native sample mixing, we added monolingual task samples to code-mixed training sets. In MTL learning, we relied on native and code-mixed samples of a semantically related task (hate detection in our case). Finally, in our third approach, we evaluated the efficacy of VMLMs via few-shot context prompting and instruction finetuning. Some interesting findings we got are (i) adding native samples improved humor (raising the F1-score up to 6.76%) and sarcasm (raising the F1-score up to 8.64%) detection, (ii) training MLMs in an MTL framework boosted performance for both humour (raising the F1-score up to 10.67%) and sarcasm (increment up to 12.35% in F1-score) detection, and (iii) prompting and instruction finetuning VMLMs couldn’t outperform the other approaches. Finally, our ablation studies and error analysis discovered the cases where our model is yet to improve. We provided our code for reproducibility.
%U https://aclanthology.org/2025.findings-emnlp.1308/
%P 24077-24107
Markdown (Informal)
[Revealing the impact of synthetic native samples and multi-tasking strategies in Hindi-English code-mixed humour and sarcasm detection](https://aclanthology.org/2025.findings-emnlp.1308/) (Mazumder et al., Findings 2025)
ACL