@inproceedings{sazzed-2020-cross,
title = "Cross-lingual sentiment classification in low-resource {B}engali language",
author = "Sazzed, Salim",
editor = "Xu, Wei and
Ritter, Alan and
Baldwin, Tim and
Rahimi, Afshin",
booktitle = "Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wnut-1.8",
doi = "10.18653/v1/2020.wnut-1.8",
pages = "50--60",
abstract = "Sentiment analysis research in low-resource languages such as Bengali is still unexplored due to the scarcity of annotated data and the lack of text processing tools. Therefore, in this work, we focus on generating resources and showing the applicability of the cross-lingual sentiment analysis approach in Bengali. For benchmarking, we created and annotated a comprehensive corpus of around 12000 Bengali reviews. To address the lack of standard text-processing tools in Bengali, we leverage resources from English utilizing machine translation. We determine the performance of supervised machine learning (ML) classifiers in machine-translated English corpus and compare it with the original Bengali corpus. Besides, we examine sentiment preservation in the machine-translated corpus utilizing Cohen{'}s Kappa and Gwet{'}s AC1. To circumvent the laborious data labeling process, we explore lexicon-based methods and study the applicability of utilizing cross-domain labeled data from the resource-rich language. We find that supervised ML classifiers show comparable performances in Bengali and machine-translated English corpus. By utilizing labeled data, they achieve 15{\%}-20{\%} higher F1 scores compared to both lexicon-based and transfer learning-based methods. Besides, we observe that machine translation does not alter the sentiment polarity of the review for most of the cases. Our experimental results demonstrate that the machine translation based cross-lingual approach can be an effective way for sentiment classification in Bengali.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sazzed-2020-cross">
<titleInfo>
<title>Cross-lingual sentiment classification in low-resource Bengali language</title>
</titleInfo>
<name type="personal">
<namePart type="given">Salim</namePart>
<namePart type="family">Sazzed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alan</namePart>
<namePart type="family">Ritter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tim</namePart>
<namePart type="family">Baldwin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Afshin</namePart>
<namePart type="family">Rahimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sentiment analysis research in low-resource languages such as Bengali is still unexplored due to the scarcity of annotated data and the lack of text processing tools. Therefore, in this work, we focus on generating resources and showing the applicability of the cross-lingual sentiment analysis approach in Bengali. For benchmarking, we created and annotated a comprehensive corpus of around 12000 Bengali reviews. To address the lack of standard text-processing tools in Bengali, we leverage resources from English utilizing machine translation. We determine the performance of supervised machine learning (ML) classifiers in machine-translated English corpus and compare it with the original Bengali corpus. Besides, we examine sentiment preservation in the machine-translated corpus utilizing Cohen’s Kappa and Gwet’s AC1. To circumvent the laborious data labeling process, we explore lexicon-based methods and study the applicability of utilizing cross-domain labeled data from the resource-rich language. We find that supervised ML classifiers show comparable performances in Bengali and machine-translated English corpus. By utilizing labeled data, they achieve 15%-20% higher F1 scores compared to both lexicon-based and transfer learning-based methods. Besides, we observe that machine translation does not alter the sentiment polarity of the review for most of the cases. Our experimental results demonstrate that the machine translation based cross-lingual approach can be an effective way for sentiment classification in Bengali.</abstract>
<identifier type="citekey">sazzed-2020-cross</identifier>
<identifier type="doi">10.18653/v1/2020.wnut-1.8</identifier>
<location>
<url>https://aclanthology.org/2020.wnut-1.8</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>50</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cross-lingual sentiment classification in low-resource Bengali language
%A Sazzed, Salim
%Y Xu, Wei
%Y Ritter, Alan
%Y Baldwin, Tim
%Y Rahimi, Afshin
%S Proceedings of the Sixth Workshop on Noisy User-generated Text (W-NUT 2020)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F sazzed-2020-cross
%X Sentiment analysis research in low-resource languages such as Bengali is still unexplored due to the scarcity of annotated data and the lack of text processing tools. Therefore, in this work, we focus on generating resources and showing the applicability of the cross-lingual sentiment analysis approach in Bengali. For benchmarking, we created and annotated a comprehensive corpus of around 12000 Bengali reviews. To address the lack of standard text-processing tools in Bengali, we leverage resources from English utilizing machine translation. We determine the performance of supervised machine learning (ML) classifiers in machine-translated English corpus and compare it with the original Bengali corpus. Besides, we examine sentiment preservation in the machine-translated corpus utilizing Cohen’s Kappa and Gwet’s AC1. To circumvent the laborious data labeling process, we explore lexicon-based methods and study the applicability of utilizing cross-domain labeled data from the resource-rich language. We find that supervised ML classifiers show comparable performances in Bengali and machine-translated English corpus. By utilizing labeled data, they achieve 15%-20% higher F1 scores compared to both lexicon-based and transfer learning-based methods. Besides, we observe that machine translation does not alter the sentiment polarity of the review for most of the cases. Our experimental results demonstrate that the machine translation based cross-lingual approach can be an effective way for sentiment classification in Bengali.
%R 10.18653/v1/2020.wnut-1.8
%U https://aclanthology.org/2020.wnut-1.8
%U https://doi.org/10.18653/v1/2020.wnut-1.8
%P 50-60
Markdown (Informal)
[Cross-lingual sentiment classification in low-resource Bengali language](https://aclanthology.org/2020.wnut-1.8) (Sazzed, WNUT 2020)
ACL