@inproceedings{-etal-2022-hindimd,
title = "{H}indi{MD}: A Multi-domain Corpora for Low-resource Sentiment Analysis",
author = "{Mamta} and
Ekbal, Asif and
Bhattacharyya, Pushpak and
Saha, Tista and
Kumar, Alka and
Srivastava, Shikha",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Thirteenth Language Resources and Evaluation Conference",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.lrec-1.764",
pages = "7061--7070",
abstract = "Social media platforms such as Twitter have evolved into a vast information sharing platform, allowing people from a variety of backgrounds and expertise to share their opinions on numerous events such as terrorism, narcotics and many other social issues. People sometimes misuse the power of social media for their agendas, such as illegal trades and negatively influencing others. Because of this, sentiment analysis has won the interest of a lot of researchers to widely analyze public opinion for social media monitoring. Several benchmark datasets for sentiment analysis across a range of domains have been made available, especially for high-resource languages. A few datasets are available for low-resource Indian languages like Hindi, such as movie reviews and product reviews, which do not address the current need for social media monitoring. In this paper, we address the challenges of sentiment analysis in Hindi and socially relevant domains by introducing a balanced corpus annotated with the sentiment classes, viz. positive, negative and neutral. To show the effective usage of the dataset, we build several deep learning based models and establish them as the baselines for further research in this direction.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="-etal-2022-hindimd">
<titleInfo>
<title>HindiMD: A Multi-domain Corpora for Low-resource Sentiment Analysis</title>
</titleInfo>
<name>
<namePart>Mamta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tista</namePart>
<namePart type="family">Saha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alka</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shikha</namePart>
<namePart type="family">Srivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media platforms such as Twitter have evolved into a vast information sharing platform, allowing people from a variety of backgrounds and expertise to share their opinions on numerous events such as terrorism, narcotics and many other social issues. People sometimes misuse the power of social media for their agendas, such as illegal trades and negatively influencing others. Because of this, sentiment analysis has won the interest of a lot of researchers to widely analyze public opinion for social media monitoring. Several benchmark datasets for sentiment analysis across a range of domains have been made available, especially for high-resource languages. A few datasets are available for low-resource Indian languages like Hindi, such as movie reviews and product reviews, which do not address the current need for social media monitoring. In this paper, we address the challenges of sentiment analysis in Hindi and socially relevant domains by introducing a balanced corpus annotated with the sentiment classes, viz. positive, negative and neutral. To show the effective usage of the dataset, we build several deep learning based models and establish them as the baselines for further research in this direction.</abstract>
<identifier type="citekey">-etal-2022-hindimd</identifier>
<location>
<url>https://aclanthology.org/2022.lrec-1.764</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>7061</start>
<end>7070</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T HindiMD: A Multi-domain Corpora for Low-resource Sentiment Analysis
%A Ekbal, Asif
%A Bhattacharyya, Pushpak
%A Saha, Tista
%A Kumar, Alka
%A Srivastava, Shikha
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Odijk, Jan
%Y Piperidis, Stelios
%A Mamta
%S Proceedings of the Thirteenth Language Resources and Evaluation Conference
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F -etal-2022-hindimd
%X Social media platforms such as Twitter have evolved into a vast information sharing platform, allowing people from a variety of backgrounds and expertise to share their opinions on numerous events such as terrorism, narcotics and many other social issues. People sometimes misuse the power of social media for their agendas, such as illegal trades and negatively influencing others. Because of this, sentiment analysis has won the interest of a lot of researchers to widely analyze public opinion for social media monitoring. Several benchmark datasets for sentiment analysis across a range of domains have been made available, especially for high-resource languages. A few datasets are available for low-resource Indian languages like Hindi, such as movie reviews and product reviews, which do not address the current need for social media monitoring. In this paper, we address the challenges of sentiment analysis in Hindi and socially relevant domains by introducing a balanced corpus annotated with the sentiment classes, viz. positive, negative and neutral. To show the effective usage of the dataset, we build several deep learning based models and establish them as the baselines for further research in this direction.
%U https://aclanthology.org/2022.lrec-1.764
%P 7061-7070
Markdown (Informal)
[HindiMD: A Multi-domain Corpora for Low-resource Sentiment Analysis](https://aclanthology.org/2022.lrec-1.764) (Mamta et al., LREC 2022)
ACL