@inproceedings{fourati-etal-2021-introducing,
title = "Introducing A large {T}unisian {A}rabizi Dialectal Dataset for Sentiment Analysis",
author = "Fourati, Chayma and
Haddad, Hatem and
Messaoudi, Abir and
BenHajhmida, Moez and
Ben Elhaj Mabrouk, Aymen and
Naski, Malek",
editor = "Habash, Nizar and
Bouamor, Houda and
Hajj, Hazem and
Magdy, Walid and
Zaghouani, Wajdi and
Bougares, Fethi and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Touileb, Samia",
booktitle = "Proceedings of the Sixth Arabic Natural Language Processing Workshop",
month = apr,
year = "2021",
address = "Kyiv, Ukraine (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.wanlp-1.25",
pages = "226--230",
abstract = "On various Social Media platforms, people, tend to use the informal way to communicate, or write posts and comments: their local dialects. In Africa, more than 1500 dialects and languages exist. Particularly, Tunisians talk and write informally using Latin letters and numbers rather than Arabic ones. In this paper, we introduce a large common-crawl-based Tunisian Arabizi dialectal dataset dedicated for Sentiment Analysis. The dataset consists of a total of 100k comments (about movies, politic, sport, etc.) annotated manually by Tunisian native speakers as Positive, negative and Neutral. We evaluate our dataset on sentiment analysis task using the Bidirectional Encoder Representations from Transformers (BERT) as a contextual language model in its multilingual version (mBERT) as an embedding technique then combining mBERT with Convolutional Neural Network (CNN) as classifier. The dataset is publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fourati-etal-2021-introducing">
<titleInfo>
<title>Introducing A large Tunisian Arabizi Dialectal Dataset for Sentiment Analysis</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chayma</namePart>
<namePart type="family">Fourati</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abir</namePart>
<namePart type="family">Messaoudi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Moez</namePart>
<namePart type="family">BenHajhmida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aymen</namePart>
<namePart type="family">Ben Elhaj Mabrouk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malek</namePart>
<namePart type="family">Naski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hazem</namePart>
<namePart type="family">Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv, Ukraine (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>On various Social Media platforms, people, tend to use the informal way to communicate, or write posts and comments: their local dialects. In Africa, more than 1500 dialects and languages exist. Particularly, Tunisians talk and write informally using Latin letters and numbers rather than Arabic ones. In this paper, we introduce a large common-crawl-based Tunisian Arabizi dialectal dataset dedicated for Sentiment Analysis. The dataset consists of a total of 100k comments (about movies, politic, sport, etc.) annotated manually by Tunisian native speakers as Positive, negative and Neutral. We evaluate our dataset on sentiment analysis task using the Bidirectional Encoder Representations from Transformers (BERT) as a contextual language model in its multilingual version (mBERT) as an embedding technique then combining mBERT with Convolutional Neural Network (CNN) as classifier. The dataset is publicly available.</abstract>
<identifier type="citekey">fourati-etal-2021-introducing</identifier>
<location>
<url>https://aclanthology.org/2021.wanlp-1.25</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>226</start>
<end>230</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Introducing A large Tunisian Arabizi Dialectal Dataset for Sentiment Analysis
%A Fourati, Chayma
%A Haddad, Hatem
%A Messaoudi, Abir
%A BenHajhmida, Moez
%A Ben Elhaj Mabrouk, Aymen
%A Naski, Malek
%Y Habash, Nizar
%Y Bouamor, Houda
%Y Hajj, Hazem
%Y Magdy, Walid
%Y Zaghouani, Wajdi
%Y Bougares, Fethi
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Touileb, Samia
%S Proceedings of the Sixth Arabic Natural Language Processing Workshop
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv, Ukraine (Virtual)
%F fourati-etal-2021-introducing
%X On various Social Media platforms, people, tend to use the informal way to communicate, or write posts and comments: their local dialects. In Africa, more than 1500 dialects and languages exist. Particularly, Tunisians talk and write informally using Latin letters and numbers rather than Arabic ones. In this paper, we introduce a large common-crawl-based Tunisian Arabizi dialectal dataset dedicated for Sentiment Analysis. The dataset consists of a total of 100k comments (about movies, politic, sport, etc.) annotated manually by Tunisian native speakers as Positive, negative and Neutral. We evaluate our dataset on sentiment analysis task using the Bidirectional Encoder Representations from Transformers (BERT) as a contextual language model in its multilingual version (mBERT) as an embedding technique then combining mBERT with Convolutional Neural Network (CNN) as classifier. The dataset is publicly available.
%U https://aclanthology.org/2021.wanlp-1.25
%P 226-230
Markdown (Informal)
[Introducing A large Tunisian Arabizi Dialectal Dataset for Sentiment Analysis](https://aclanthology.org/2021.wanlp-1.25) (Fourati et al., WANLP 2021)
ACL