@inproceedings{singh-goyal-2020-sentiment,
title = "Sentiment Analysis of {E}nglish-{P}unjabi Code-Mixed Social Media Content",
author = "Singh, Mukhtiar and
Goyal, Vishal",
editor = "Goyal, Vishal and
Ekbal, Asif",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations",
month = dec,
year = "2020",
address = "Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-demos.9",
pages = "24--25",
abstract = "Sentiment analysis is a field of study for analyzing people{'}s emotions, such as Nice, Happy, ਦੁਖੀ (sad), changa (Good), etc. towards the entities and attributes expressed in written text. It noticed that, on microblogging websites (Facebook, YouTube, Twitter ), most people used more than one language to express their emotions. The change of one language to another language within the same written text is called code-mixing. In this research, we gathered the English-Punjabi code-mixed corpus from micro-blogging websites. We have performed language identification of code-mix text, which includes Phonetic Typing, Abbreviation, Wordplay, Intentionally misspelled words and Slang words. Then we performed tokenization of English and Punjabi language words consisting of different spellings. Then we performed sentiment analysis based on the above text based on the lexicon approach. The dictionary created for English Punjabi code mixed consists of opinionated words. The opinionated words are then categorized into three categories i.e. positive words list, negative words list, and neutral words list. The rest of the words are being stored in an unsorted word list. By using the N-gram approach, a statistical technique is applied at sentence level sentiment polarity of the English-Punjabi code-mixed dataset. Our results show an accuracy of 83{\%} with an F-1 measure of 77{\%}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="singh-goyal-2020-sentiment">
<titleInfo>
<title>Sentiment Analysis of English-Punjabi Code-Mixed Social Media Content</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mukhtiar</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Vishal</namePart>
<namePart type="family">Goyal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentiment analysis is a field of study for analyzing people’s emotions, such as Nice, Happy, ਦੁਖੀ (sad), changa (Good), etc. towards the entities and attributes expressed in written text. It noticed that, on microblogging websites (Facebook, YouTube, Twitter ), most people used more than one language to express their emotions. The change of one language to another language within the same written text is called code-mixing. In this research, we gathered the English-Punjabi code-mixed corpus from micro-blogging websites. We have performed language identification of code-mix text, which includes Phonetic Typing, Abbreviation, Wordplay, Intentionally misspelled words and Slang words. Then we performed tokenization of English and Punjabi language words consisting of different spellings. Then we performed sentiment analysis based on the above text based on the lexicon approach. The dictionary created for English Punjabi code mixed consists of opinionated words. The opinionated words are then categorized into three categories i.e. positive words list, negative words list, and neutral words list. The rest of the words are being stored in an unsorted word list. By using the N-gram approach, a statistical technique is applied at sentence level sentiment polarity of the English-Punjabi code-mixed dataset. Our results show an accuracy of 83% with an F-1 measure of 77%.</abstract>
<identifier type="citekey">singh-goyal-2020-sentiment</identifier>
<location>
<url>https://aclanthology.org/2020.icon-demos.9</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>24</start>
<end>25</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentiment Analysis of English-Punjabi Code-Mixed Social Media Content
%A Singh, Mukhtiar
%A Goyal, Vishal
%Y Goyal, Vishal
%Y Ekbal, Asif
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON): System Demonstrations
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Patna, India
%F singh-goyal-2020-sentiment
%X Sentiment analysis is a field of study for analyzing people’s emotions, such as Nice, Happy, ਦੁਖੀ (sad), changa (Good), etc. towards the entities and attributes expressed in written text. It noticed that, on microblogging websites (Facebook, YouTube, Twitter ), most people used more than one language to express their emotions. The change of one language to another language within the same written text is called code-mixing. In this research, we gathered the English-Punjabi code-mixed corpus from micro-blogging websites. We have performed language identification of code-mix text, which includes Phonetic Typing, Abbreviation, Wordplay, Intentionally misspelled words and Slang words. Then we performed tokenization of English and Punjabi language words consisting of different spellings. Then we performed sentiment analysis based on the above text based on the lexicon approach. The dictionary created for English Punjabi code mixed consists of opinionated words. The opinionated words are then categorized into three categories i.e. positive words list, negative words list, and neutral words list. The rest of the words are being stored in an unsorted word list. By using the N-gram approach, a statistical technique is applied at sentence level sentiment polarity of the English-Punjabi code-mixed dataset. Our results show an accuracy of 83% with an F-1 measure of 77%.
%U https://aclanthology.org/2020.icon-demos.9
%P 24-25
Markdown (Informal)
[Sentiment Analysis of English-Punjabi Code-Mixed Social Media Content](https://aclanthology.org/2020.icon-demos.9) (Singh & Goyal, ICON 2020)
ACL