@inproceedings{abu-kwaik-etal-2020-arabic,
title = "An {A}rabic Tweets Sentiment Analysis Dataset ({ATSAD}) using Distant Supervision and Self Training",
author = "Abu Kwaik, Kathrein and
Chatzikyriakidis, Stergios and
Dobnik, Simon and
Saad, Motaz and
Johansson, Richard",
editor = "Al-Khalifa, Hend and
Magdy, Walid and
Darwish, Kareem and
Elsayed, Tamer and
Mubarak, Hamdy",
booktitle = "Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resource Association",
url = "https://aclanthology.org/2020.osact-1.1",
pages = "1--8",
abstract = "As the number of social media users increases, they express their thoughts, needs, socialise and publish their opinions reviews. For good social media sentiment analysis, good quality resources are needed, and the lack of these resources is particularly evident for languages other than English, in particular Arabic. The available Arabic resources lack of from either the size of the corpus or the quality of the annotation. In this paper, we present an Arabic Sentiment Analysis Corpus collected from Twitter, which contains 36K tweets labelled into positive and negative. We employed distant supervision and self-training approaches into the corpus to annotate it. Besides, we release an 8K tweets manually annotated as a gold standard. We evaluated the corpus intrinsically by comparing it to human classification and pre-trained sentiment analysis models, Moreover, we apply extrinsic evaluation methods exploiting sentiment analysis task and achieve an accuracy of 86{\%}.",
language = "English",
ISBN = "979-10-95546-51-1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abu-kwaik-etal-2020-arabic">
<titleInfo>
<title>An Arabic Tweets Sentiment Analysis Dataset (ATSAD) using Distant Supervision and Self Training</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kathrein</namePart>
<namePart type="family">Abu Kwaik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stergios</namePart>
<namePart type="family">Chatzikyriakidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simon</namePart>
<namePart type="family">Dobnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Motaz</namePart>
<namePart type="family">Saad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Richard</namePart>
<namePart type="family">Johansson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tamer</namePart>
<namePart type="family">Elsayed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hamdy</namePart>
<namePart type="family">Mubarak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resource Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-51-1</identifier>
</relatedItem>
<abstract>As the number of social media users increases, they express their thoughts, needs, socialise and publish their opinions reviews. For good social media sentiment analysis, good quality resources are needed, and the lack of these resources is particularly evident for languages other than English, in particular Arabic. The available Arabic resources lack of from either the size of the corpus or the quality of the annotation. In this paper, we present an Arabic Sentiment Analysis Corpus collected from Twitter, which contains 36K tweets labelled into positive and negative. We employed distant supervision and self-training approaches into the corpus to annotate it. Besides, we release an 8K tweets manually annotated as a gold standard. We evaluated the corpus intrinsically by comparing it to human classification and pre-trained sentiment analysis models, Moreover, we apply extrinsic evaluation methods exploiting sentiment analysis task and achieve an accuracy of 86%.</abstract>
<identifier type="citekey">abu-kwaik-etal-2020-arabic</identifier>
<location>
<url>https://aclanthology.org/2020.osact-1.1</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Arabic Tweets Sentiment Analysis Dataset (ATSAD) using Distant Supervision and Self Training
%A Abu Kwaik, Kathrein
%A Chatzikyriakidis, Stergios
%A Dobnik, Simon
%A Saad, Motaz
%A Johansson, Richard
%Y Al-Khalifa, Hend
%Y Magdy, Walid
%Y Darwish, Kareem
%Y Elsayed, Tamer
%Y Mubarak, Hamdy
%S Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection
%D 2020
%8 May
%I European Language Resource Association
%C Marseille, France
%@ 979-10-95546-51-1
%G English
%F abu-kwaik-etal-2020-arabic
%X As the number of social media users increases, they express their thoughts, needs, socialise and publish their opinions reviews. For good social media sentiment analysis, good quality resources are needed, and the lack of these resources is particularly evident for languages other than English, in particular Arabic. The available Arabic resources lack of from either the size of the corpus or the quality of the annotation. In this paper, we present an Arabic Sentiment Analysis Corpus collected from Twitter, which contains 36K tweets labelled into positive and negative. We employed distant supervision and self-training approaches into the corpus to annotate it. Besides, we release an 8K tweets manually annotated as a gold standard. We evaluated the corpus intrinsically by comparing it to human classification and pre-trained sentiment analysis models, Moreover, we apply extrinsic evaluation methods exploiting sentiment analysis task and achieve an accuracy of 86%.
%U https://aclanthology.org/2020.osact-1.1
%P 1-8
Markdown (Informal)
[An Arabic Tweets Sentiment Analysis Dataset (ATSAD) using Distant Supervision and Self Training](https://aclanthology.org/2020.osact-1.1) (Abu Kwaik et al., OSACT 2020)
ACL