@inproceedings{alshenaifi-azmi-2022-arabic,
title = "{A}rabic dialect identification using machine learning and transformer-based models: Submission to the {NADI} 2022 Shared Task",
author = "AlShenaifi, Nouf and
Azmi, Aqil",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wanlp-1.50/",
doi = "10.18653/v1/2022.wanlp-1.50",
pages = "464--467",
abstract = "Arabic has a wide range of dialects. Dialect is the language variation of a specific community. In this paper, we show the models we created to participate in the third Nuanced Arabic Dialect Identification (NADI) shared task (Subtask 1) that involves developing a system to classify a tweet into a country-level dialect. We utilized a number of machine learning techniques as well as deep learning transformer-based models. For the machine learning approach, we build an ensemble classifier of various machine learning models. In our deep learning approach, we consider bidirectional LSTM model and AraBERT pretrained model. The results demonstrate that the deep learning approach performs noticeably better than the other machine learning approaches with 68.7{\%} accuracy on the development set."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alshenaifi-azmi-2022-arabic">
<titleInfo>
<title>Arabic dialect identification using machine learning and transformer-based models: Submission to the NADI 2022 Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nouf</namePart>
<namePart type="family">AlShenaifi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aqil</namePart>
<namePart type="family">Azmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Arabic has a wide range of dialects. Dialect is the language variation of a specific community. In this paper, we show the models we created to participate in the third Nuanced Arabic Dialect Identification (NADI) shared task (Subtask 1) that involves developing a system to classify a tweet into a country-level dialect. We utilized a number of machine learning techniques as well as deep learning transformer-based models. For the machine learning approach, we build an ensemble classifier of various machine learning models. In our deep learning approach, we consider bidirectional LSTM model and AraBERT pretrained model. The results demonstrate that the deep learning approach performs noticeably better than the other machine learning approaches with 68.7% accuracy on the development set.</abstract>
<identifier type="citekey">alshenaifi-azmi-2022-arabic</identifier>
<identifier type="doi">10.18653/v1/2022.wanlp-1.50</identifier>
<location>
<url>https://aclanthology.org/2022.wanlp-1.50/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>464</start>
<end>467</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Arabic dialect identification using machine learning and transformer-based models: Submission to the NADI 2022 Shared Task
%A AlShenaifi, Nouf
%A Azmi, Aqil
%Y Bouamor, Houda
%Y Al-Khalifa, Hend
%Y Darwish, Kareem
%Y Rambow, Owen
%Y Bougares, Fethi
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Khalifa, Salam
%Y Zaghouani, Wajdi
%S Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F alshenaifi-azmi-2022-arabic
%X Arabic has a wide range of dialects. Dialect is the language variation of a specific community. In this paper, we show the models we created to participate in the third Nuanced Arabic Dialect Identification (NADI) shared task (Subtask 1) that involves developing a system to classify a tweet into a country-level dialect. We utilized a number of machine learning techniques as well as deep learning transformer-based models. For the machine learning approach, we build an ensemble classifier of various machine learning models. In our deep learning approach, we consider bidirectional LSTM model and AraBERT pretrained model. The results demonstrate that the deep learning approach performs noticeably better than the other machine learning approaches with 68.7% accuracy on the development set.
%R 10.18653/v1/2022.wanlp-1.50
%U https://aclanthology.org/2022.wanlp-1.50/
%U https://doi.org/10.18653/v1/2022.wanlp-1.50
%P 464-467
Markdown (Informal)
[Arabic dialect identification using machine learning and transformer-based models: Submission to the NADI 2022 Shared Task](https://aclanthology.org/2022.wanlp-1.50/) (AlShenaifi & Azmi, WANLP 2022)
ACL