@inproceedings{kumar-etal-2024-indisentiment140,
title = "{I}ndi{S}entiment140: Sentiment Analysis Dataset for {I}ndian Languages with Emphasis on Low-Resource Languages using Machine Translation",
author = "Kumar, Saurabh and
Sanasam, Ranbir and
Nandi, Sukumar",
editor = "Duh, Kevin and
Gomez, Helena and
Bethard, Steven",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-long.425",
pages = "7682--7691",
abstract = "Sentiment analysis, a fundamental aspect of Natural Language Processing (NLP), involves the classification of emotions, opinions, and attitudes in text data. In the context of India, with its vast linguistic diversity and low-resource languages, the challenge is to support sentiment analysis in numerous Indian languages. This study explores the use of machine translation to bridge this gap. The investigation examines the feasibility of machine translation for creating sentiment analysis datasets in 22 Indian languages. Google Translate, with its extensive language support, is employed for this purpose in translating the Sentiment140 dataset. The study aims to provide insights into the practicality of using machine translation in the context of India{'}s linguistic diversity for sentiment analysis datasets. Our findings indicate that a dataset generated using Google Translate has the potential to serve as a foundational framework for tackling the low-resource challenges commonly encountered in sentiment analysis for Indian languages.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kumar-etal-2024-indisentiment140">
<titleInfo>
<title>IndiSentiment140: Sentiment Analysis Dataset for Indian Languages with Emphasis on Low-Resource Languages using Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Saurabh</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ranbir</namePart>
<namePart type="family">Sanasam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sukumar</namePart>
<namePart type="family">Nandi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helena</namePart>
<namePart type="family">Gomez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steven</namePart>
<namePart type="family">Bethard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentiment analysis, a fundamental aspect of Natural Language Processing (NLP), involves the classification of emotions, opinions, and attitudes in text data. In the context of India, with its vast linguistic diversity and low-resource languages, the challenge is to support sentiment analysis in numerous Indian languages. This study explores the use of machine translation to bridge this gap. The investigation examines the feasibility of machine translation for creating sentiment analysis datasets in 22 Indian languages. Google Translate, with its extensive language support, is employed for this purpose in translating the Sentiment140 dataset. The study aims to provide insights into the practicality of using machine translation in the context of India’s linguistic diversity for sentiment analysis datasets. Our findings indicate that a dataset generated using Google Translate has the potential to serve as a foundational framework for tackling the low-resource challenges commonly encountered in sentiment analysis for Indian languages.</abstract>
<identifier type="citekey">kumar-etal-2024-indisentiment140</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-long.425</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>7682</start>
<end>7691</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IndiSentiment140: Sentiment Analysis Dataset for Indian Languages with Emphasis on Low-Resource Languages using Machine Translation
%A Kumar, Saurabh
%A Sanasam, Ranbir
%A Nandi, Sukumar
%Y Duh, Kevin
%Y Gomez, Helena
%Y Bethard, Steven
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 1: Long Papers)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F kumar-etal-2024-indisentiment140
%X Sentiment analysis, a fundamental aspect of Natural Language Processing (NLP), involves the classification of emotions, opinions, and attitudes in text data. In the context of India, with its vast linguistic diversity and low-resource languages, the challenge is to support sentiment analysis in numerous Indian languages. This study explores the use of machine translation to bridge this gap. The investigation examines the feasibility of machine translation for creating sentiment analysis datasets in 22 Indian languages. Google Translate, with its extensive language support, is employed for this purpose in translating the Sentiment140 dataset. The study aims to provide insights into the practicality of using machine translation in the context of India’s linguistic diversity for sentiment analysis datasets. Our findings indicate that a dataset generated using Google Translate has the potential to serve as a foundational framework for tackling the low-resource challenges commonly encountered in sentiment analysis for Indian languages.
%U https://aclanthology.org/2024.naacl-long.425
%P 7682-7691
Markdown (Informal)
[IndiSentiment140: Sentiment Analysis Dataset for Indian Languages with Emphasis on Low-Resource Languages using Machine Translation](https://aclanthology.org/2024.naacl-long.425) (Kumar et al., NAACL 2024)
ACL