@inproceedings{b-etal-2024-mucs,
title = "{MUCS}@{D}ravidian{L}ang{T}ech-2024: A Grid Search Approach to Explore Sentiment Analysis in Code-mixed {T}amil and {T}ulu",
author = "B, Prathvi and
K, Manavi and
K, Subrahmanyapoojary and
Hegde, Asha and
G, Kavya and
Shashirekha, Hosahalli",
editor = "Chakravarthi, Bharathi Raja and
Priyadharshini, Ruba and
Madasamy, Anand Kumar and
Thavareesan, Sajeetha and
Sherly, Elizabeth and
Nadarajan, Rajeswari and
Ravikiran, Manikandan",
booktitle = "Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages",
month = mar,
year = "2024",
address = "St. Julian's, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.dravidianlangtech-1.43",
pages = "257--261",
abstract = "Sentiment Analysis (SA) is a field of computational study that analyzes and understands people{'}s opinions, attitudes, and emotions toward any entity. A review of an entity can be written about an individual, an event, a topic, a product, etc., and such reviews are abundant on social media platforms. The increasing number of social media users and the growing amount of user-generated code-mixed content such as reviews, comments, posts etc., on social media have resulted in a rising demand for efficient tools capable of effectively analyzing such content to detect the sentiments. In spite of this, SA of social media text is challenging because the code-mixed text is complex. To address SA in code-mixed Tamil and Tulu text, this paper describes the Machine Learning (ML) models submitted by our team - MUCS to {``}Sentiment Analysis in Tamil and Tulu - Dravidian- LangTech{''} - a shared task organized at European Chapter of the Association for Computational Linguistics (EACL) 2024. Linear Support Vector classifier (LinearSVC) and ensemble of 5 ML classifiers (k Nearest Neighbour (kNN), Stochastic Gradient Descent (SGD), Logistic Regression (LR), LinearSVC, and Random Forest Classifier (RFC)) with hard voting trained using concatenated features obtained from word and character n-ngrams vectoized from Term Frequency-Inverse Document Frequency (TF-IDF) vectorizer and CountVectorizer. Further, Gridsearch algorithm is employed to obtain optimal hyperparameter values.The proposed ensemble model obtained macro F1 scores of 0.260 and 0.550 for Tamil and Tulu languages respectively.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="b-etal-2024-mucs">
<titleInfo>
<title>MUCS@DravidianLangTech-2024: A Grid Search Approach to Explore Sentiment Analysis in Code-mixed Tamil and Tulu</title>
</titleInfo>
<name type="personal">
<namePart type="given">Prathvi</namePart>
<namePart type="family">B</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manavi</namePart>
<namePart type="family">K</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Subrahmanyapoojary</namePart>
<namePart type="family">K</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asha</namePart>
<namePart type="family">Hegde</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kavya</namePart>
<namePart type="family">G</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosahalli</namePart>
<namePart type="family">Shashirekha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bharathi</namePart>
<namePart type="given">Raja</namePart>
<namePart type="family">Chakravarthi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ruba</namePart>
<namePart type="family">Priyadharshini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anand</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Madasamy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sajeetha</namePart>
<namePart type="family">Thavareesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Sherly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rajeswari</namePart>
<namePart type="family">Nadarajan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manikandan</namePart>
<namePart type="family">Ravikiran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentiment Analysis (SA) is a field of computational study that analyzes and understands people’s opinions, attitudes, and emotions toward any entity. A review of an entity can be written about an individual, an event, a topic, a product, etc., and such reviews are abundant on social media platforms. The increasing number of social media users and the growing amount of user-generated code-mixed content such as reviews, comments, posts etc., on social media have resulted in a rising demand for efficient tools capable of effectively analyzing such content to detect the sentiments. In spite of this, SA of social media text is challenging because the code-mixed text is complex. To address SA in code-mixed Tamil and Tulu text, this paper describes the Machine Learning (ML) models submitted by our team - MUCS to “Sentiment Analysis in Tamil and Tulu - Dravidian- LangTech” - a shared task organized at European Chapter of the Association for Computational Linguistics (EACL) 2024. Linear Support Vector classifier (LinearSVC) and ensemble of 5 ML classifiers (k Nearest Neighbour (kNN), Stochastic Gradient Descent (SGD), Logistic Regression (LR), LinearSVC, and Random Forest Classifier (RFC)) with hard voting trained using concatenated features obtained from word and character n-ngrams vectoized from Term Frequency-Inverse Document Frequency (TF-IDF) vectorizer and CountVectorizer. Further, Gridsearch algorithm is employed to obtain optimal hyperparameter values.The proposed ensemble model obtained macro F1 scores of 0.260 and 0.550 for Tamil and Tulu languages respectively.</abstract>
<identifier type="citekey">b-etal-2024-mucs</identifier>
<location>
<url>https://aclanthology.org/2024.dravidianlangtech-1.43</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>257</start>
<end>261</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MUCS@DravidianLangTech-2024: A Grid Search Approach to Explore Sentiment Analysis in Code-mixed Tamil and Tulu
%A B, Prathvi
%A K, Manavi
%A K, Subrahmanyapoojary
%A Hegde, Asha
%A G, Kavya
%A Shashirekha, Hosahalli
%Y Chakravarthi, Bharathi Raja
%Y Priyadharshini, Ruba
%Y Madasamy, Anand Kumar
%Y Thavareesan, Sajeetha
%Y Sherly, Elizabeth
%Y Nadarajan, Rajeswari
%Y Ravikiran, Manikandan
%S Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F b-etal-2024-mucs
%X Sentiment Analysis (SA) is a field of computational study that analyzes and understands people’s opinions, attitudes, and emotions toward any entity. A review of an entity can be written about an individual, an event, a topic, a product, etc., and such reviews are abundant on social media platforms. The increasing number of social media users and the growing amount of user-generated code-mixed content such as reviews, comments, posts etc., on social media have resulted in a rising demand for efficient tools capable of effectively analyzing such content to detect the sentiments. In spite of this, SA of social media text is challenging because the code-mixed text is complex. To address SA in code-mixed Tamil and Tulu text, this paper describes the Machine Learning (ML) models submitted by our team - MUCS to “Sentiment Analysis in Tamil and Tulu - Dravidian- LangTech” - a shared task organized at European Chapter of the Association for Computational Linguistics (EACL) 2024. Linear Support Vector classifier (LinearSVC) and ensemble of 5 ML classifiers (k Nearest Neighbour (kNN), Stochastic Gradient Descent (SGD), Logistic Regression (LR), LinearSVC, and Random Forest Classifier (RFC)) with hard voting trained using concatenated features obtained from word and character n-ngrams vectoized from Term Frequency-Inverse Document Frequency (TF-IDF) vectorizer and CountVectorizer. Further, Gridsearch algorithm is employed to obtain optimal hyperparameter values.The proposed ensemble model obtained macro F1 scores of 0.260 and 0.550 for Tamil and Tulu languages respectively.
%U https://aclanthology.org/2024.dravidianlangtech-1.43
%P 257-261
Markdown (Informal)
[MUCS@DravidianLangTech-2024: A Grid Search Approach to Explore Sentiment Analysis in Code-mixed Tamil and Tulu](https://aclanthology.org/2024.dravidianlangtech-1.43) (B et al., DravidianLangTech-WS 2024)
ACL
- Prathvi B, Manavi K, Subrahmanyapoojary K, Asha Hegde, Kavya G, and Hosahalli Shashirekha. 2024. MUCS@DravidianLangTech-2024: A Grid Search Approach to Explore Sentiment Analysis in Code-mixed Tamil and Tulu. In Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages, pages 257–261, St. Julian's, Malta. Association for Computational Linguistics.