@inproceedings{alghamdi-etal-2023-aramus,
title = "{A}ra{MUS}: Pushing the Limits of Data and Model Scale for {A}rabic Natural Language Processing",
author = "Alghamdi, Asaad and
Duan, Xinyu and
Jiang, Wei and
Wang, Zhenhai and
Wu, Yimeng and
Xia, Qingrong and
Wang, Zhefeng and
Zheng, Yi and
Rezagholizadeh, Mehdi and
Huai, Baoxing and
Cheng, Peilun and
Ghaddar, Abbas",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.181",
doi = "10.18653/v1/2023.findings-acl.181",
pages = "2883--2894",
abstract = "Developing monolingual large Pre-trained Language Models (PLMs) is shown to be very successful in handling different tasks in Natural Language Processing (NLP). In this work, we present AraMUS, the largest Arabic PLM with 11B parameters trained on 529GB of high-quality Arabic textual data. AraMUS achieves state-of-the-art performances on a diverse set of Arabic classification and generative tasks. Moreover, AraMUS shows impressive few-shot learning abilities compared with the best existing Arabic PLMs.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alghamdi-etal-2023-aramus">
<titleInfo>
<title>AraMUS: Pushing the Limits of Data and Model Scale for Arabic Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Asaad</namePart>
<namePart type="family">Alghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinyu</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenhai</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yimeng</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qingrong</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhefeng</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yi</namePart>
<namePart type="family">Zheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehdi</namePart>
<namePart type="family">Rezagholizadeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Baoxing</namePart>
<namePart type="family">Huai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peilun</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abbas</namePart>
<namePart type="family">Ghaddar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Developing monolingual large Pre-trained Language Models (PLMs) is shown to be very successful in handling different tasks in Natural Language Processing (NLP). In this work, we present AraMUS, the largest Arabic PLM with 11B parameters trained on 529GB of high-quality Arabic textual data. AraMUS achieves state-of-the-art performances on a diverse set of Arabic classification and generative tasks. Moreover, AraMUS shows impressive few-shot learning abilities compared with the best existing Arabic PLMs.</abstract>
<identifier type="citekey">alghamdi-etal-2023-aramus</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.181</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.181</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>2883</start>
<end>2894</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T AraMUS: Pushing the Limits of Data and Model Scale for Arabic Natural Language Processing
%A Alghamdi, Asaad
%A Duan, Xinyu
%A Jiang, Wei
%A Wang, Zhenhai
%A Wu, Yimeng
%A Xia, Qingrong
%A Wang, Zhefeng
%A Zheng, Yi
%A Rezagholizadeh, Mehdi
%A Huai, Baoxing
%A Cheng, Peilun
%A Ghaddar, Abbas
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F alghamdi-etal-2023-aramus
%X Developing monolingual large Pre-trained Language Models (PLMs) is shown to be very successful in handling different tasks in Natural Language Processing (NLP). In this work, we present AraMUS, the largest Arabic PLM with 11B parameters trained on 529GB of high-quality Arabic textual data. AraMUS achieves state-of-the-art performances on a diverse set of Arabic classification and generative tasks. Moreover, AraMUS shows impressive few-shot learning abilities compared with the best existing Arabic PLMs.
%R 10.18653/v1/2023.findings-acl.181
%U https://aclanthology.org/2023.findings-acl.181
%U https://doi.org/10.18653/v1/2023.findings-acl.181
%P 2883-2894
Markdown (Informal)
[AraMUS: Pushing the Limits of Data and Model Scale for Arabic Natural Language Processing](https://aclanthology.org/2023.findings-acl.181) (Alghamdi et al., Findings 2023)
ACL
- Asaad Alghamdi, Xinyu Duan, Wei Jiang, Zhenhai Wang, Yimeng Wu, Qingrong Xia, Zhefeng Wang, Yi Zheng, Mehdi Rezagholizadeh, Baoxing Huai, Peilun Cheng, and Abbas Ghaddar. 2023. AraMUS: Pushing the Limits of Data and Model Scale for Arabic Natural Language Processing. In Findings of the Association for Computational Linguistics: ACL 2023, pages 2883–2894, Toronto, Canada. Association for Computational Linguistics.