@inproceedings{hatekar-abdo-2023-iunadi,
title = "{IUNADI} at {NADI} 2023 shared task: Country-level {A}rabic Dialect Classification in Tweets for the Shared Task {NADI} 2023",
author = "Hatekar, Yash and
Abdo, Muhammad",
editor = "Sawaf, Hassan and
El-Beltagy, Samhaa and
Zaghouani, Wajdi and
Magdy, Walid and
Abdelali, Ahmed and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Habash, Nizar and
Khalifa, Salam and
Keleg, Amr and
Haddad, Hatem and
Zitouni, Imed and
Mrini, Khalil and
Almatham, Rawan",
booktitle = "Proceedings of ArabicNLP 2023",
month = dec,
year = "2023",
address = "Singapore (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.arabicnlp-1.72",
doi = "10.18653/v1/2023.arabicnlp-1.72",
pages = "665--669",
abstract = "In this paper, we describe our participation in the NADI2023 shared task for the classification of Arabic dialects in tweets. For training, evaluation, and testing purposes, a primary dataset comprising tweets from 18 Arab countries is provided, along with three older datasets. The main objective is to develop a model capable of classifying tweets from these 18 countries. We outline our approach, which leverages various machine learning models. Our experiments demonstrate that large language models, particularly Arabertv2-Large, Arabertv2-Base, and CAMeLBERT-Mix DID MADAR, consistently outperform traditional methods such as SVM, XGBOOST, Multinomial Naive Bayes, AdaBoost, and Random Forests.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hatekar-abdo-2023-iunadi">
<titleInfo>
<title>IUNADI at NADI 2023 shared task: Country-level Arabic Dialect Classification in Tweets for the Shared Task NADI 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yash</namePart>
<namePart type="family">Hatekar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of ArabicNLP 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hassan</namePart>
<namePart type="family">Sawaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samhaa</namePart>
<namePart type="family">El-Beltagy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amr</namePart>
<namePart type="family">Keleg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we describe our participation in the NADI2023 shared task for the classification of Arabic dialects in tweets. For training, evaluation, and testing purposes, a primary dataset comprising tweets from 18 Arab countries is provided, along with three older datasets. The main objective is to develop a model capable of classifying tweets from these 18 countries. We outline our approach, which leverages various machine learning models. Our experiments demonstrate that large language models, particularly Arabertv2-Large, Arabertv2-Base, and CAMeLBERT-Mix DID MADAR, consistently outperform traditional methods such as SVM, XGBOOST, Multinomial Naive Bayes, AdaBoost, and Random Forests.</abstract>
<identifier type="citekey">hatekar-abdo-2023-iunadi</identifier>
<identifier type="doi">10.18653/v1/2023.arabicnlp-1.72</identifier>
<location>
<url>https://aclanthology.org/2023.arabicnlp-1.72</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>665</start>
<end>669</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T IUNADI at NADI 2023 shared task: Country-level Arabic Dialect Classification in Tweets for the Shared Task NADI 2023
%A Hatekar, Yash
%A Abdo, Muhammad
%Y Sawaf, Hassan
%Y El-Beltagy, Samhaa
%Y Zaghouani, Wajdi
%Y Magdy, Walid
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Habash, Nizar
%Y Khalifa, Salam
%Y Keleg, Amr
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y Mrini, Khalil
%Y Almatham, Rawan
%S Proceedings of ArabicNLP 2023
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore (Hybrid)
%F hatekar-abdo-2023-iunadi
%X In this paper, we describe our participation in the NADI2023 shared task for the classification of Arabic dialects in tweets. For training, evaluation, and testing purposes, a primary dataset comprising tweets from 18 Arab countries is provided, along with three older datasets. The main objective is to develop a model capable of classifying tweets from these 18 countries. We outline our approach, which leverages various machine learning models. Our experiments demonstrate that large language models, particularly Arabertv2-Large, Arabertv2-Base, and CAMeLBERT-Mix DID MADAR, consistently outperform traditional methods such as SVM, XGBOOST, Multinomial Naive Bayes, AdaBoost, and Random Forests.
%R 10.18653/v1/2023.arabicnlp-1.72
%U https://aclanthology.org/2023.arabicnlp-1.72
%U https://doi.org/10.18653/v1/2023.arabicnlp-1.72
%P 665-669
Markdown (Informal)
[IUNADI at NADI 2023 shared task: Country-level Arabic Dialect Classification in Tweets for the Shared Task NADI 2023](https://aclanthology.org/2023.arabicnlp-1.72) (Hatekar & Abdo, ArabicNLP-WS 2023)
ACL