@inproceedings{kusampudi-etal-2021-sentiment,
title = "Sentiment Analysis in Code-Mixed {T}elugu-{E}nglish Text with Unsupervised Data Normalization",
author = "Kusampudi, Siva Subrahamanyam Varma and
Sathineni, Preetham and
Mamidi, Radhika",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.86",
pages = "753--760",
abstract = "In a multilingual society, people communicate in more than one language, leading to Code-Mixed data. Sentimental analysis on Code-Mixed Telugu-English Text (CMTET) poses unique challenges. The unstructured nature of the Code-Mixed Data is due to the informal language, informal transliterations, and spelling errors. In this paper, we introduce an annotated dataset for Sentiment Analysis in CMTET. Also, we report an accuracy of 80.22{\%} on this dataset using novel unsupervised data normalization with a Multilayer Perceptron (MLP) model. This proposed data normalization technique can be extended to any NLP task involving CMTET. Further, we report an increase of 2.53{\%} accuracy due to this data normalization approach in our best model.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kusampudi-etal-2021-sentiment">
<titleInfo>
<title>Sentiment Analysis in Code-Mixed Telugu-English Text with Unsupervised Data Normalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Siva</namePart>
<namePart type="given">Subrahamanyam</namePart>
<namePart type="given">Varma</namePart>
<namePart type="family">Kusampudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preetham</namePart>
<namePart type="family">Sathineni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radhika</namePart>
<namePart type="family">Mamidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In a multilingual society, people communicate in more than one language, leading to Code-Mixed data. Sentimental analysis on Code-Mixed Telugu-English Text (CMTET) poses unique challenges. The unstructured nature of the Code-Mixed Data is due to the informal language, informal transliterations, and spelling errors. In this paper, we introduce an annotated dataset for Sentiment Analysis in CMTET. Also, we report an accuracy of 80.22% on this dataset using novel unsupervised data normalization with a Multilayer Perceptron (MLP) model. This proposed data normalization technique can be extended to any NLP task involving CMTET. Further, we report an increase of 2.53% accuracy due to this data normalization approach in our best model.</abstract>
<identifier type="citekey">kusampudi-etal-2021-sentiment</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.86</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>753</start>
<end>760</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sentiment Analysis in Code-Mixed Telugu-English Text with Unsupervised Data Normalization
%A Kusampudi, Siva Subrahamanyam Varma
%A Sathineni, Preetham
%A Mamidi, Radhika
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F kusampudi-etal-2021-sentiment
%X In a multilingual society, people communicate in more than one language, leading to Code-Mixed data. Sentimental analysis on Code-Mixed Telugu-English Text (CMTET) poses unique challenges. The unstructured nature of the Code-Mixed Data is due to the informal language, informal transliterations, and spelling errors. In this paper, we introduce an annotated dataset for Sentiment Analysis in CMTET. Also, we report an accuracy of 80.22% on this dataset using novel unsupervised data normalization with a Multilayer Perceptron (MLP) model. This proposed data normalization technique can be extended to any NLP task involving CMTET. Further, we report an increase of 2.53% accuracy due to this data normalization approach in our best model.
%U https://aclanthology.org/2021.ranlp-1.86
%P 753-760
Markdown (Informal)
[Sentiment Analysis in Code-Mixed Telugu-English Text with Unsupervised Data Normalization](https://aclanthology.org/2021.ranlp-1.86) (Kusampudi et al., RANLP 2021)
ACL