@inproceedings{sonu-etal-2022-identifying,
title = "Identifying Emotions in Code Mixed {H}indi-{E}nglish Tweets",
author = "Sonu, Sanket and
Haque, Rejwanul and
Hasanuzzaman, Mohammed and
Stynes, Paul and
Pathak, Pramod",
editor = "Jha, Girish Nath and
L., Sobha and
Bali, Kalika and
Ojha, Atul Kr.",
booktitle = "Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.wildre-1.7",
pages = "35--41",
abstract = "Emotion detection (ED) in tweets is a text classification problem that is of interest to Natural Language Processing (NLP) researchers. Code-mixing (CM) is a process of mixing linguistic units such as words of two different languages. The CM languages are characteristically different from the languages whose linguistic units are used for mixing. Whilst NLP has been shown to be successful for low-resource languages, it becomes challenging to perform NLP tasks on CM languages. As for ED, it has been rarely investigated on CM languages such as Hindi{---}English due to the lack of training data that is required for today{'}s data-driven classification algorithms. This research proposes a gold standard dataset for detecting emotions in CM Hindi{--}English tweets. This paper also presents our results about the investigation of the usefulness of our gold-standard dataset while testing a number of state-of-the-art classification algorithms. We found that the ED classifier built using SVM provided us the highest accuracy (75.17{\%}) on the hold-out test set. This research would benefit the NLP community in detecting emotions from social media platforms in multilingual societies.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sonu-etal-2022-identifying">
<titleInfo>
<title>Identifying Emotions in Code Mixed Hindi-English Tweets</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanket</namePart>
<namePart type="family">Sonu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rejwanul</namePart>
<namePart type="family">Haque</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammed</namePart>
<namePart type="family">Hasanuzzaman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">Stynes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pramod</namePart>
<namePart type="family">Pathak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Girish</namePart>
<namePart type="given">Nath</namePart>
<namePart type="family">Jha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">L.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atul</namePart>
<namePart type="given">Kr.</namePart>
<namePart type="family">Ojha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Emotion detection (ED) in tweets is a text classification problem that is of interest to Natural Language Processing (NLP) researchers. Code-mixing (CM) is a process of mixing linguistic units such as words of two different languages. The CM languages are characteristically different from the languages whose linguistic units are used for mixing. Whilst NLP has been shown to be successful for low-resource languages, it becomes challenging to perform NLP tasks on CM languages. As for ED, it has been rarely investigated on CM languages such as Hindi—English due to the lack of training data that is required for today’s data-driven classification algorithms. This research proposes a gold standard dataset for detecting emotions in CM Hindi–English tweets. This paper also presents our results about the investigation of the usefulness of our gold-standard dataset while testing a number of state-of-the-art classification algorithms. We found that the ED classifier built using SVM provided us the highest accuracy (75.17%) on the hold-out test set. This research would benefit the NLP community in detecting emotions from social media platforms in multilingual societies.</abstract>
<identifier type="citekey">sonu-etal-2022-identifying</identifier>
<location>
<url>https://aclanthology.org/2022.wildre-1.7</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>35</start>
<end>41</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Emotions in Code Mixed Hindi-English Tweets
%A Sonu, Sanket
%A Haque, Rejwanul
%A Hasanuzzaman, Mohammed
%A Stynes, Paul
%A Pathak, Pramod
%Y Jha, Girish Nath
%Y L., Sobha
%Y Bali, Kalika
%Y Ojha, Atul Kr.
%S Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F sonu-etal-2022-identifying
%X Emotion detection (ED) in tweets is a text classification problem that is of interest to Natural Language Processing (NLP) researchers. Code-mixing (CM) is a process of mixing linguistic units such as words of two different languages. The CM languages are characteristically different from the languages whose linguistic units are used for mixing. Whilst NLP has been shown to be successful for low-resource languages, it becomes challenging to perform NLP tasks on CM languages. As for ED, it has been rarely investigated on CM languages such as Hindi—English due to the lack of training data that is required for today’s data-driven classification algorithms. This research proposes a gold standard dataset for detecting emotions in CM Hindi–English tweets. This paper also presents our results about the investigation of the usefulness of our gold-standard dataset while testing a number of state-of-the-art classification algorithms. We found that the ED classifier built using SVM provided us the highest accuracy (75.17%) on the hold-out test set. This research would benefit the NLP community in detecting emotions from social media platforms in multilingual societies.
%U https://aclanthology.org/2022.wildre-1.7
%P 35-41
Markdown (Informal)
[Identifying Emotions in Code Mixed Hindi-English Tweets](https://aclanthology.org/2022.wildre-1.7) (Sonu et al., WILDRE 2022)
ACL
- Sanket Sonu, Rejwanul Haque, Mohammed Hasanuzzaman, Paul Stynes, and Pramod Pathak. 2022. Identifying Emotions in Code Mixed Hindi-English Tweets. In Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference, pages 35–41, Marseille, France. European Language Resources Association.