@inproceedings{gaanoun-benelallam-2020-arabic,
title = "{A}rabic dialect identification: An {A}rabic-{BERT} model with data augmentation and ensembling strategy",
author = "Gaanoun, Kamel and
Benelallam, Imade",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wanlp-1.28/",
pages = "275--281",
abstract = "This paper presents the ArabicProcessors team`s deep learning system designed for the NADI 2020 Subtask 1 (country-level dialect identification) and Subtask 2 (province-level dialect identification). We used Arabic-Bert in combination with data augmentation and ensembling methods. Unlabeled data provided by task organizers (10 Million tweets) was split into multiple subparts, to which we applied semi-supervised learning method, and finally ran a specific ensembling process on the resulting models. This system ranked 3rd in Subtask 1 with 23.26{\%} F1-score and 2nd in Subtask 2 with 5.75{\%} F1-score."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gaanoun-benelallam-2020-arabic">
<titleInfo>
<title>Arabic dialect identification: An Arabic-BERT model with data augmentation and ensembling strategy</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kamel</namePart>
<namePart type="family">Gaanoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imade</namePart>
<namePart type="family">Benelallam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents the ArabicProcessors team‘s deep learning system designed for the NADI 2020 Subtask 1 (country-level dialect identification) and Subtask 2 (province-level dialect identification). We used Arabic-Bert in combination with data augmentation and ensembling methods. Unlabeled data provided by task organizers (10 Million tweets) was split into multiple subparts, to which we applied semi-supervised learning method, and finally ran a specific ensembling process on the resulting models. This system ranked 3rd in Subtask 1 with 23.26% F1-score and 2nd in Subtask 2 with 5.75% F1-score.</abstract>
<identifier type="citekey">gaanoun-benelallam-2020-arabic</identifier>
<location>
<url>https://aclanthology.org/2020.wanlp-1.28/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>275</start>
<end>281</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic dialect identification: An Arabic-BERT model with data augmentation and ensembling strategy
%A Gaanoun, Kamel
%A Benelallam, Imade
%Y Zitouni, Imed
%Y Abdul-Mageed, Muhammad
%Y Bouamor, Houda
%Y Bougares, Fethi
%Y El-Haj, Mahmoud
%Y Tomeh, Nadi
%Y Zaghouani, Wajdi
%S Proceedings of the Fifth Arabic Natural Language Processing Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F gaanoun-benelallam-2020-arabic
%X This paper presents the ArabicProcessors team‘s deep learning system designed for the NADI 2020 Subtask 1 (country-level dialect identification) and Subtask 2 (province-level dialect identification). We used Arabic-Bert in combination with data augmentation and ensembling methods. Unlabeled data provided by task organizers (10 Million tweets) was split into multiple subparts, to which we applied semi-supervised learning method, and finally ran a specific ensembling process on the resulting models. This system ranked 3rd in Subtask 1 with 23.26% F1-score and 2nd in Subtask 2 with 5.75% F1-score.
%U https://aclanthology.org/2020.wanlp-1.28/
%P 275-281
Markdown (Informal)
[Arabic dialect identification: An Arabic-BERT model with data augmentation and ensembling strategy](https://aclanthology.org/2020.wanlp-1.28/) (Gaanoun & Benelallam, WANLP 2020)
ACL