@inproceedings{aalabdulsalam-2022-squ,
title = "{SQU}-{CS} @ {NADI} 2022: Dialectal {A}rabic Identification using One-vs-One Classification with {TF}-{IDF} Weights Computed on Character n-grams",
author = "AAlAbdulsalam, Abdulrahman Khalifa",
editor = "Bouamor, Houda and
Al-Khalifa, Hend and
Darwish, Kareem and
Rambow, Owen and
Bougares, Fethi and
Abdelali, Ahmed and
Tomeh, Nadi and
Khalifa, Salam and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.wanlp-1.45",
doi = "10.18653/v1/2022.wanlp-1.45",
pages = "436--441",
abstract = "In this paper, I present an approach using one-vs-one classification scheme with TF-IDF term weighting on character n-grams for identifying Arabic dialects used in social media. The scheme was evaluated in the context of the third Nuanced Arabic Dialect Identification (NADI 2022) shared task for identifying Arabic dialects used in Twitter messages. The approach was implemented with logistic regression loss and trained using stochastic gradient decent (SGD) algorithm. This simple method achieved a macro F1 score of 22.89{\%} and 10.83{\%} on TEST A and TEST B, respectively, in comparison to an approach based on AraBERT pretrained transformer model which achieved a macro F1 score of 30.01{\%} and 14.84{\%}, respectively. My submission based on AraBERT scored a macro F1 average of 22.42{\%} and was ranked 10 out of the 19 teams who participated in the task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aalabdulsalam-2022-squ">
<titleInfo>
<title>SQU-CS @ NADI 2022: Dialectal Arabic Identification using One-vs-One Classification with TF-IDF Weights Computed on Character n-grams</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abdulrahman</namePart>
<namePart type="given">Khalifa</namePart>
<namePart type="family">AAlAbdulsalam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hend</namePart>
<namePart type="family">Al-Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kareem</namePart>
<namePart type="family">Darwish</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Owen</namePart>
<namePart type="family">Rambow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, I present an approach using one-vs-one classification scheme with TF-IDF term weighting on character n-grams for identifying Arabic dialects used in social media. The scheme was evaluated in the context of the third Nuanced Arabic Dialect Identification (NADI 2022) shared task for identifying Arabic dialects used in Twitter messages. The approach was implemented with logistic regression loss and trained using stochastic gradient decent (SGD) algorithm. This simple method achieved a macro F1 score of 22.89% and 10.83% on TEST A and TEST B, respectively, in comparison to an approach based on AraBERT pretrained transformer model which achieved a macro F1 score of 30.01% and 14.84%, respectively. My submission based on AraBERT scored a macro F1 average of 22.42% and was ranked 10 out of the 19 teams who participated in the task.</abstract>
<identifier type="citekey">aalabdulsalam-2022-squ</identifier>
<identifier type="doi">10.18653/v1/2022.wanlp-1.45</identifier>
<location>
<url>https://aclanthology.org/2022.wanlp-1.45</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>436</start>
<end>441</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SQU-CS @ NADI 2022: Dialectal Arabic Identification using One-vs-One Classification with TF-IDF Weights Computed on Character n-grams
%A AAlAbdulsalam, Abdulrahman Khalifa
%Y Bouamor, Houda
%Y Al-Khalifa, Hend
%Y Darwish, Kareem
%Y Rambow, Owen
%Y Bougares, Fethi
%Y Abdelali, Ahmed
%Y Tomeh, Nadi
%Y Khalifa, Salam
%Y Zaghouani, Wajdi
%S Proceedings of the Seventh Arabic Natural Language Processing Workshop (WANLP)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F aalabdulsalam-2022-squ
%X In this paper, I present an approach using one-vs-one classification scheme with TF-IDF term weighting on character n-grams for identifying Arabic dialects used in social media. The scheme was evaluated in the context of the third Nuanced Arabic Dialect Identification (NADI 2022) shared task for identifying Arabic dialects used in Twitter messages. The approach was implemented with logistic regression loss and trained using stochastic gradient decent (SGD) algorithm. This simple method achieved a macro F1 score of 22.89% and 10.83% on TEST A and TEST B, respectively, in comparison to an approach based on AraBERT pretrained transformer model which achieved a macro F1 score of 30.01% and 14.84%, respectively. My submission based on AraBERT scored a macro F1 average of 22.42% and was ranked 10 out of the 19 teams who participated in the task.
%R 10.18653/v1/2022.wanlp-1.45
%U https://aclanthology.org/2022.wanlp-1.45
%U https://doi.org/10.18653/v1/2022.wanlp-1.45
%P 436-441
Markdown (Informal)
[SQU-CS @ NADI 2022: Dialectal Arabic Identification using One-vs-One Classification with TF-IDF Weights Computed on Character n-grams](https://aclanthology.org/2022.wanlp-1.45) (AAlAbdulsalam, WANLP 2022)
ACL