@inproceedings{khondaker-etal-2024-benchmarking,
title = "Benchmarking {LL}a{MA}-3 on {A}rabic Language Generation Tasks",
author = "Khondaker, Md Tawkat Islam and
Naeem, Numaan and
Khan, Fatimah and
Elmadany, AbdelRahim and
Abdul-Mageed, Muhammad",
editor = "Habash, Nizar and
Bouamor, Houda and
Eskander, Ramy and
Tomeh, Nadi and
Abu Farha, Ibrahim and
Abdelali, Ahmed and
Touileb, Samia and
Hamed, Injy and
Onaizan, Yaser and
Alhafni, Bashar and
Antoun, Wissam and
Khalifa, Salam and
Haddad, Hatem and
Zitouni, Imed and
AlKhamissi, Badr and
Almatham, Rawan and
Mrini, Khalil",
booktitle = "Proceedings of the Second Arabic Natural Language Processing Conference",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.arabicnlp-1.24/",
doi = "10.18653/v1/2024.arabicnlp-1.24",
pages = "283--297",
abstract = "Open-sourced large language models (LLMs) have exhibited remarkable performance in a variety of NLP tasks, often catching up with the closed-sourced LLMs like ChatGPT. Among these open LLMs, LLaMA-3-70B has emerged as the most recent and the most prominent one. However, how LLaMA-3-70B would situate itself in multilingual settings, especially in a rich morphological language like Arabic, has yet to be explored. In this work, we focus to bridge this gap by evaluating LLaMA-3-70B on a diverse set of Arabic natural language generation (NLG) benchmarks. To the best of our knowledge, this is the first study that comprehensively evaluates LLaMA-3-70B on tasks related to Arabic natural language generation. Our study reveals that LLaMA-3-70B lags behind the closed LLMs like ChatGPT, both in modern standard Arabic (MSA) and dialectal Arabic (DA). We further compare the performance of LLaMA-3-70B with our smaller and dedicated finetuned Arabic models. We find that both LLaMA-3-70B and ChatGPT are outperformed by comparatively smaller dedicated Arabic models, indicating the scope for potential improvement with Arabic-focused LLMs."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khondaker-etal-2024-benchmarking">
<titleInfo>
<title>Benchmarking LLaMA-3 on Arabic Language Generation Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Md</namePart>
<namePart type="given">Tawkat</namePart>
<namePart type="given">Islam</namePart>
<namePart type="family">Khondaker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Numaan</namePart>
<namePart type="family">Naeem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fatimah</namePart>
<namePart type="family">Khan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">AbdelRahim</namePart>
<namePart type="family">Elmadany</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Arabic Natural Language Processing Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nizar</namePart>
<namePart type="family">Habash</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ramy</namePart>
<namePart type="family">Eskander</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ibrahim</namePart>
<namePart type="family">Abu Farha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Abdelali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Samia</namePart>
<namePart type="family">Touileb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Injy</namePart>
<namePart type="family">Hamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bashar</namePart>
<namePart type="family">Alhafni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wissam</namePart>
<namePart type="family">Antoun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salam</namePart>
<namePart type="family">Khalifa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hatem</namePart>
<namePart type="family">Haddad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Badr</namePart>
<namePart type="family">AlKhamissi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rawan</namePart>
<namePart type="family">Almatham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalil</namePart>
<namePart type="family">Mrini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Bangkok, Thailand</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Open-sourced large language models (LLMs) have exhibited remarkable performance in a variety of NLP tasks, often catching up with the closed-sourced LLMs like ChatGPT. Among these open LLMs, LLaMA-3-70B has emerged as the most recent and the most prominent one. However, how LLaMA-3-70B would situate itself in multilingual settings, especially in a rich morphological language like Arabic, has yet to be explored. In this work, we focus to bridge this gap by evaluating LLaMA-3-70B on a diverse set of Arabic natural language generation (NLG) benchmarks. To the best of our knowledge, this is the first study that comprehensively evaluates LLaMA-3-70B on tasks related to Arabic natural language generation. Our study reveals that LLaMA-3-70B lags behind the closed LLMs like ChatGPT, both in modern standard Arabic (MSA) and dialectal Arabic (DA). We further compare the performance of LLaMA-3-70B with our smaller and dedicated finetuned Arabic models. We find that both LLaMA-3-70B and ChatGPT are outperformed by comparatively smaller dedicated Arabic models, indicating the scope for potential improvement with Arabic-focused LLMs.</abstract>
<identifier type="citekey">khondaker-etal-2024-benchmarking</identifier>
<identifier type="doi">10.18653/v1/2024.arabicnlp-1.24</identifier>
<location>
<url>https://aclanthology.org/2024.arabicnlp-1.24/</url>
</location>
<part>
<date>2024-08</date>
<extent unit="page">
<start>283</start>
<end>297</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking LLaMA-3 on Arabic Language Generation Tasks
%A Khondaker, Md Tawkat Islam
%A Naeem, Numaan
%A Khan, Fatimah
%A Elmadany, AbdelRahim
%A Abdul-Mageed, Muhammad
%Y Habash, Nizar
%Y Bouamor, Houda
%Y Eskander, Ramy
%Y Tomeh, Nadi
%Y Abu Farha, Ibrahim
%Y Abdelali, Ahmed
%Y Touileb, Samia
%Y Hamed, Injy
%Y Onaizan, Yaser
%Y Alhafni, Bashar
%Y Antoun, Wissam
%Y Khalifa, Salam
%Y Haddad, Hatem
%Y Zitouni, Imed
%Y AlKhamissi, Badr
%Y Almatham, Rawan
%Y Mrini, Khalil
%S Proceedings of the Second Arabic Natural Language Processing Conference
%D 2024
%8 August
%I Association for Computational Linguistics
%C Bangkok, Thailand
%F khondaker-etal-2024-benchmarking
%X Open-sourced large language models (LLMs) have exhibited remarkable performance in a variety of NLP tasks, often catching up with the closed-sourced LLMs like ChatGPT. Among these open LLMs, LLaMA-3-70B has emerged as the most recent and the most prominent one. However, how LLaMA-3-70B would situate itself in multilingual settings, especially in a rich morphological language like Arabic, has yet to be explored. In this work, we focus to bridge this gap by evaluating LLaMA-3-70B on a diverse set of Arabic natural language generation (NLG) benchmarks. To the best of our knowledge, this is the first study that comprehensively evaluates LLaMA-3-70B on tasks related to Arabic natural language generation. Our study reveals that LLaMA-3-70B lags behind the closed LLMs like ChatGPT, both in modern standard Arabic (MSA) and dialectal Arabic (DA). We further compare the performance of LLaMA-3-70B with our smaller and dedicated finetuned Arabic models. We find that both LLaMA-3-70B and ChatGPT are outperformed by comparatively smaller dedicated Arabic models, indicating the scope for potential improvement with Arabic-focused LLMs.
%R 10.18653/v1/2024.arabicnlp-1.24
%U https://aclanthology.org/2024.arabicnlp-1.24/
%U https://doi.org/10.18653/v1/2024.arabicnlp-1.24
%P 283-297
Markdown (Informal)
[Benchmarking LLaMA-3 on Arabic Language Generation Tasks](https://aclanthology.org/2024.arabicnlp-1.24/) (Khondaker et al., ArabicNLP 2024)
ACL
- Md Tawkat Islam Khondaker, Numaan Naeem, Fatimah Khan, AbdelRahim Elmadany, and Muhammad Abdul-Mageed. 2024. Benchmarking LLaMA-3 on Arabic Language Generation Tasks. In Proceedings of the Second Arabic Natural Language Processing Conference, pages 283–297, Bangkok, Thailand. Association for Computational Linguistics.